d_vacc_acf <-ggAcf(d_vacc_ts)+ggtitle("ACF Plot for Daily Vaccinations per Million") +theme_bw() +geom_segment(lineend ="butt", color ="#5a3196") +geom_hline(yintercept =0, color ="#5a3196") d_vacc_acf1 <-ggAcf(diff(d_vacc_ts))+ggtitle("ACF Plot for Differented Daily Vaccinations per Million") +theme_bw() +geom_segment(lineend ="butt", color ="#5a3196") +geom_hline(yintercept =0, color ="#5a3196") d_vacc_pacf <-ggPacf(d_vacc_ts)+ggtitle("PACF Plot for Daily Vaccinations per Million") +theme_bw()+geom_segment(lineend ="butt", color ="#5a3196") +geom_hline(yintercept =0, color ="#5a3196") d_vacc_pacf1 <-ggPacf(diff(d_vacc_ts))+ggtitle("PACF Plot for Differented Daily Vaccinations per Million") +theme_bw()+geom_segment(lineend ="butt", color ="#5a3196") +geom_hline(yintercept =0, color ="#5a3196") grid.arrange(d_vacc_acf, d_vacc_pacf, nrow=2)
Code
grid.arrange(d_vacc_acf1, d_vacc_pacf1, nrow=2)
Code
p_vacc_acf <-ggAcf(p_vacc_ts)+ggtitle("ACF Plot for People Vaccinated per Hundred") +theme_bw() +geom_segment(lineend ="butt", color ="#5a3196") +geom_hline(yintercept =0, color ="#5a3196") p_vacc_acf1 <-ggAcf(diff(p_vacc_ts))+ggtitle("ACF Plot for Differented People Vaccinated per Hundred") +theme_bw() +geom_segment(lineend ="butt", color ="#5a3196") +geom_hline(yintercept =0, color ="#5a3196") p_vacc_pacf <-ggPacf(p_vacc_ts)+ggtitle("PACF Plot for People Vaccinated per Hundred") +theme_bw()+geom_segment(lineend ="butt", color ="#5a3196") +geom_hline(yintercept =0, color ="#5a3196") p_vacc_pacf1 <-ggPacf(diff(p_vacc_ts))+ggtitle("PACF Plot for Differented People Vaccinated per Hundred") +theme_bw()+geom_segment(lineend ="butt", color ="#5a3196") +geom_hline(yintercept =0, color ="#5a3196") grid.arrange(p_vacc_acf, p_vacc_pacf, nrow=2)
Code
grid.arrange(p_vacc_acf1, p_vacc_pacf1, nrow=2)
Code
pf_vacc_acf <-ggAcf(pf_vacc_ts)+ggtitle("ACF Plot for People Fully Vaccinated per Hundred") +theme_bw() +geom_segment(lineend ="butt", color ="#5a3196") +geom_hline(yintercept =0, color ="#5a3196") pf_vacc_acf1 <-ggAcf(diff(pf_vacc_ts))+ggtitle("ACF Plot for Differented People Fully Vaccinated per Hundred") +theme_bw() +geom_segment(lineend ="butt", color ="#5a3196") +geom_hline(yintercept =0, color ="#5a3196") pf_vacc_pacf <-ggPacf(pf_vacc_ts)+ggtitle("PACF Plot for People Fully Vaccinated per Hundred") +theme_bw()+geom_segment(lineend ="butt", color ="#5a3196") +geom_hline(yintercept =0, color ="#5a3196") pf_vacc_pacf1 <-ggPacf(diff(pf_vacc_ts))+ggtitle("PACF Plot for Differented People Fully Vaccinated per Hundred") +theme_bw()+geom_segment(lineend ="butt", color ="#5a3196") +geom_hline(yintercept =0, color ="#5a3196")grid.arrange(pf_vacc_acf, pf_vacc_pacf, nrow=2)
Code
grid.arrange(pf_vacc_acf1, pf_vacc_pacf1, nrow=2)
Code
case_acf <-ggAcf(case_ts)+ggtitle("ACF Plot for Newly Confirmed Cases") +theme_bw() +geom_segment(lineend ="butt", color ="#5a3196") +geom_hline(yintercept =0, color ="#5a3196") case_acf1 <-ggAcf(diff(case_ts))+ggtitle("ACF Plot for Differented Newly Confirmed Cases") +theme_bw() +geom_segment(lineend ="butt", color ="#5a3196") +geom_hline(yintercept =0, color ="#5a3196") case_pacf <-ggPacf(case_ts)+ggtitle("PACF Plot for Newly Confirmed Cases") +theme_bw()+geom_segment(lineend ="butt", color ="#5a3196") +geom_hline(yintercept =0, color ="#5a3196") case_pacf1 <-ggPacf(diff(case_ts))+ggtitle("PACF Plot for Differented Newly Confirmed Cases") +theme_bw()+geom_segment(lineend ="butt", color ="#5a3196") +geom_hline(yintercept =0, color ="#5a3196") grid.arrange(case_acf, case_pacf, nrow=2)
Code
grid.arrange(case_acf1, case_pacf1, nrow=2)
Code
dead_acf <-ggAcf(dead_ts)+ggtitle("ACF Plot for Dead Cases") +theme_bw() +geom_segment(lineend ="butt", color ="#5a3196") +geom_hline(yintercept =0, color ="#5a3196") dead_acf1 <-ggAcf(diff(dead_ts))+ggtitle("ACF Plot for Differented Dead Cases") +theme_bw() +geom_segment(lineend ="butt", color ="#5a3196") +geom_hline(yintercept =0, color ="#5a3196") dead_pacf <-ggPacf(dead_ts)+ggtitle("PACF Plot for Dead Cases") +theme_bw()+geom_segment(lineend ="butt", color ="#5a3196") +geom_hline(yintercept =0, color ="#5a3196") dead_pacf1 <-ggPacf(diff(dead_ts))+ggtitle("PACF Plot for Differented Dead Cases") +theme_bw()+geom_segment(lineend ="butt", color ="#5a3196") +geom_hline(yintercept =0, color ="#5a3196") grid.arrange(dead_acf, dead_pacf, nrow=2)
Code
grid.arrange(dead_acf1, dead_pacf1, nrow=2)
Code
hos1_acf <-ggAcf(hos_ts1)+ggtitle("ACF Plot for Number of Inpatient Beds") +theme_bw() +geom_segment(lineend ="butt", color ="#5a3196") +geom_hline(yintercept =0, color ="#5a3196") hos1_acf1 <-ggAcf(diff(hos_ts1))+ggtitle("ACF Plot for Differented Number of Inpatient Beds") +theme_bw() +geom_segment(lineend ="butt", color ="#5a3196") +geom_hline(yintercept =0, color ="#5a3196") hos1_pacf <-ggPacf(hos_ts1)+ggtitle("PACF Plot for Number of Inpatient Beds") +theme_bw()+geom_segment(lineend ="butt", color ="#5a3196") +geom_hline(yintercept =0, color ="#5a3196") hos1_pacf1 <-ggPacf(diff(hos_ts1))+ggtitle("PACF Plot for Differented Number of Inpatient Beds") +theme_bw()+geom_segment(lineend ="butt", color ="#5a3196") +geom_hline(yintercept =0, color ="#5a3196") grid.arrange(hos1_acf, hos1_pacf, nrow=2)
Code
grid.arrange(hos1_acf1, hos1_pacf1, nrow=2)
Code
hos2_acf <-ggAcf(hos_ts2)+ggtitle("ACF Plot for Number of Inpatient Beds Used for COVID") +theme_bw() +geom_segment(lineend ="butt", color ="#5a3196") +geom_hline(yintercept =0, color ="#5a3196") hos2_acf1 <-ggAcf(diff(hos_ts2))+ggtitle("ACF Plot for Differented Number of Inpatient Beds Used for COVID") +theme_bw() +geom_segment(lineend ="butt", color ="#5a3196") +geom_hline(yintercept =0, color ="#5a3196") hos2_pacf <-ggPacf(hos_ts2)+ggtitle("PACF Plot for Number of Inpatient Beds Used for COVID") +theme_bw()+geom_segment(lineend ="butt", color ="#5a3196") +geom_hline(yintercept =0, color ="#5a3196") hos2_pacf1 <-ggPacf(diff(hos_ts2))+ggtitle("PACF Plot for Differented Number of Inpatient Beds Used for COVID") +theme_bw()+geom_segment(lineend ="butt", color ="#5a3196") +geom_hline(yintercept =0, color ="#5a3196") grid.arrange(hos2_acf, hos2_pacf, nrow=2)
Code
grid.arrange(hos2_acf1, hos2_pacf1, nrow=2)
Code
hos3_acf <-ggAcf(hos_ts3)+ggtitle("ACF Plot for Utilization Rate of Inpatient Beds for COVID") +theme_bw() +geom_segment(lineend ="butt", color ="#5a3196") +geom_hline(yintercept =0, color ="#5a3196") hos3_acf1 <-ggAcf(diff(hos_ts3))+ggtitle("ACF Plot for Differented Utilization Rate of Inpatient Beds for COVID") +theme_bw() +geom_segment(lineend ="butt", color ="#5a3196") +geom_hline(yintercept =0, color ="#5a3196") hos3_pacf <-ggPacf(hos_ts3)+ggtitle("PACF Plot for Utilization Rate of Inpatient Beds for COVID") +theme_bw()+geom_segment(lineend ="butt", color ="#5a3196") +geom_hline(yintercept =0, color ="#5a3196") hos3_pacf1 <-ggPacf(diff(hos_ts3))+ggtitle("PACF Plot for Differented Utilization Rate of Inpatient Beds for COVID") +theme_bw()+geom_segment(lineend ="butt", color ="#5a3196") +geom_hline(yintercept =0, color ="#5a3196") grid.arrange(hos3_acf, hos3_pacf, nrow=2)
Code
grid.arrange(hos3_acf1, hos3_pacf1, nrow=2)
Code
emp_acf <-ggAcf(unemploy_ts)+ggtitle("ACF Plot for Unemployment Rate") +theme_bw() +geom_segment(lineend ="butt", color ="#5a3196") +geom_hline(yintercept =0, color ="#5a3196") emp_acf1 <-ggAcf(diff(unemploy_ts))+ggtitle("ACF Plot for Differented Unemployment Rate") +theme_bw() +geom_segment(lineend ="butt", color ="#5a3196") +geom_hline(yintercept =0, color ="#5a3196") emp_pacf <-ggPacf(unemploy_ts)+ggtitle("PACF Plot for Unemployment Rate") +theme_bw()+geom_segment(lineend ="butt", color ="#5a3196") +geom_hline(yintercept =0, color ="#5a3196") emp_pacf1 <-ggPacf(diff(unemploy_ts))+ggtitle("PACF Plot for Differented Unemployment Rate") +theme_bw()+geom_segment(lineend ="butt", color ="#5a3196") +geom_hline(yintercept =0, color ="#5a3196") grid.arrange(emp_acf, emp_pacf, nrow=2)
Code
grid.arrange(emp_acf1, emp_pacf1, nrow=2)
Code
stock_acf <-ggAcf(stock_ts)+ggtitle("ACF Plot for Pfizer Stock Price") +theme_bw() +geom_segment(lineend ="butt", color ="#5a3196") +geom_hline(yintercept =0, color ="#5a3196") stock_acf1 <-ggAcf(diff(stock_ts))+ggtitle("ACF Plot for Differented Pfizer Stock Price") +theme_bw() +geom_segment(lineend ="butt", color ="#5a3196") +geom_hline(yintercept =0, color ="#5a3196") stock_pacf <-ggPacf(stock_ts)+ggtitle("PACF Plot for Pfizer Stock Price") +theme_bw()+geom_segment(lineend ="butt", color ="#5a3196") +geom_hline(yintercept =0, color ="#5a3196") stock_pacf1 <-ggPacf(diff(stock_ts))+ggtitle("PACF Plot for Differented Pfizer Stock Price") +theme_bw()+geom_segment(lineend ="butt", color ="#5a3196") +geom_hline(yintercept =0, color ="#5a3196") grid.arrange(stock_acf, stock_pacf, nrow=2)
Code
grid.arrange(stock_acf1, stock_pacf1, nrow=2)
Code
demo_acf <-ggAcf(demo_ts)+ggtitle("ACF Plot for Support Rate for Democratic") +theme_bw() +geom_segment(lineend ="butt", color ="#5a3196") +geom_hline(yintercept =0, color ="#5a3196") demo_acf1 <-ggAcf(diff(demo_ts))+ggtitle("ACF Plot for Differented Support Rate for Democratic") +theme_bw() +geom_segment(lineend ="butt", color ="#5a3196") +geom_hline(yintercept =0, color ="#5a3196") demo_pacf <-ggPacf(demo_ts)+ggtitle("PACF Plot for Support Rate for Democratic") +theme_bw()+geom_segment(lineend ="butt", color ="#5a3196") +geom_hline(yintercept =0, color ="#5a3196") demo_pacf1 <-ggPacf(diff(demo_ts))+ggtitle("PACF Plot for Differented Support Rate for Democratic") +theme_bw()+geom_segment(lineend ="butt", color ="#5a3196") +geom_hline(yintercept =0, color ="#5a3196") grid.arrange(demo_acf, demo_pacf, nrow=2)
Code
grid.arrange(demo_acf1, demo_pacf1, nrow=2)
Code
inde_acf <-ggAcf(inde_ts)+ggtitle("ACF Plot for Support Rate for Independent") +theme_bw() +geom_segment(lineend ="butt", color ="#5a3196") +geom_hline(yintercept =0, color ="#5a3196") inde_acf1 <-ggAcf(diff(inde_ts))+ggtitle("ACF Plot for Differented Support Rate for Independent") +theme_bw() +geom_segment(lineend ="butt", color ="#5a3196") +geom_hline(yintercept =0, color ="#5a3196") inde_pacf <-ggPacf(inde_ts)+ggtitle("PACF Plot for Support Rate for Independent") +theme_bw()+geom_segment(lineend ="butt", color ="#5a3196") +geom_hline(yintercept =0, color ="#5a3196") inde_pacf1 <-ggPacf(diff(inde_ts))+ggtitle("PACF Plot for Differented Support Rate for Independent") +theme_bw()+geom_segment(lineend ="butt", color ="#5a3196") +geom_hline(yintercept =0, color ="#5a3196")grid.arrange(inde_acf, inde_pacf, nrow=2)
Code
grid.arrange(inde_acf1, inde_pacf1, nrow=2)
Code
rep_acf <-ggAcf(rep_ts)+ggtitle("ACF Plot for Support Rate for Republican") +theme_bw() +geom_segment(lineend ="butt", color ="#5a3196") +geom_hline(yintercept =0, color ="#5a3196") rep_acf1 <-ggAcf(diff(rep_ts))+ggtitle("ACF Plot for Differented Support Rate for Republican") +theme_bw() +geom_segment(lineend ="butt", color ="#5a3196") +geom_hline(yintercept =0, color ="#5a3196") rep_pacf <-ggPacf(rep_ts)+ggtitle("PACF Plot for Support Rate for Republican") +theme_bw()+geom_segment(lineend ="butt", color ="#5a3196") +geom_hline(yintercept =0, color ="#5a3196") rep_pacf1 <-ggPacf(diff(rep_ts))+ggtitle("PACF Plot for Differented Support Rate for Republican") +theme_bw()+geom_segment(lineend ="butt", color ="#5a3196") +geom_hline(yintercept =0, color ="#5a3196") grid.arrange(rep_acf, rep_pacf, nrow=2)
Code
grid.arrange(rep_acf1, rep_pacf1, nrow=2)
Number of Daily Vaccinations Per Million: ACF Plot has significant lags at 1 and 2 so p = 1, 2. PACF Plot has significant lags at 1 and 2 so q = 1, 2. Regarding stationarity, the ACF plot reveals autocorrelation values surpassing the threshold represented by the dashed line. Additionally, the presence of autocorrelation values above the threshold in the ACF plot indicates stationarity; this suggests that the time series has consistent, predictable patterns over time, confirmed by significant autocorrelation at multiple lags. This stationary behavior is crucial for the effective modeling and forecasting of the series.
Number of People Vaccinated Per Hundred: ACF Plot has significant lags at 1-3 so p = 1, 2, 3. PACF Plot has significant lags at 1 so q = 1. Regarding stationarity, the ACF plot reveals autocorrelation values surpassing the threshold represented by the dashed line.Additionally, the presence of autocorrelation values above the threshold in the ACF plot indicates stationarity; this suggests that the time series has consistent, predictable patterns over time, confirmed by significant autocorrelation at multiple lags. This stationary behavior is crucial for the effective modeling and forecasting of the series.
Number of People Fully Vaccinated Per Hundred: ACF Plot has significant lags at 1-3 so p = 1, 2, 3. PACF Plot has significant lags at 1 so q = 1. Regarding stationarity, the ACF plot reveals autocorrelation values surpassing the threshold represented by the dashed line. Additionally, the presence of autocorrelation values above the threshold in the ACF plot indicates stationarity; this suggests that the time series has consistent, predictable patterns over time, confirmed by significant autocorrelation at multiple lags. This stationary behavior is crucial for the effective modeling and forecasting of the series.
Number of Newly Confirmed Cases: ACF Plot has significant lags at 1-10 so p = 1, 2, 3,4, 5, 6, 7, 8, 9, 10. However, in general we care about only the first couple of lags, in this case the first 3. PACF Plot has significant lags at 1 so q = 1. Regarding stationarity, the ACF plot reveals autocorrelation values surpassing the threshold represented by the dashed line.Additionally, the presence of autocorrelation values above the threshold in the ACF plot indicates stationarity; this suggests that the time series has consistent, predictable patterns over time, confirmed by significant autocorrelation at multiple lags. This stationary behavior is crucial for the effective modeling and forecasting of the series.
Number of Death Cases: ACF Plot has significant lags at 1-10 so p = 1, 2, 3,4, 5, 6, 7, 8, 9, 10. However, in general we care about only the first couple of lags, in this case the first 3. PACF Plot has significant lags at 1 so q = 1. Regarding stationarity, the ACF plot reveals autocorrelation values surpassing the threshold represented by the dashed line. Additionally, the presence of autocorrelation values above the threshold in the ACF plot indicates stationarity; this suggests that the time series has consistent, predictable patterns over time, confirmed by significant autocorrelation at multiple lags. This stationary behavior is crucial for the effective modeling and forecasting of the series.
Number of Inpatient Beds: ACF Plot has significant lags at 1 and 2 so p = 1, 2. PACF Plot has significant lags at 1 and 4 so q = 1, 4. Regarding stationarity, the ACF plot reveals autocorrelation values surpassing the threshold represented by the dashed line.Additionally, the presence of autocorrelation values above the threshold in the ACF plot indicates stationarity; this suggests that the time series has consistent, predictable patterns over time, confirmed by significant autocorrelation at multiple lags. This stationary behavior is crucial for the effective modeling and forecasting of the series.
Number of Inpatient Beds Used for COVID: ACF Plot has significant lags at 1 and 2 so p = 1, 2. PACF Plot has significant lags at 1 and 2 so q = 1, 2. Regarding stationarity, the ACF plot reveals autocorrelation values surpassing the threshold represented by the dashed line. Additionally, the presence of autocorrelation values above the threshold in the ACF plot indicates stationarity; this suggests that the time series has consistent, predictable patterns over time, confirmed by significant autocorrelation at multiple lags. This stationary behavior is crucial for the effective modeling and forecasting of the series.
Utilization Rate for Inpatient Beds Used for COVID: ACF Plot has significant lags at 1 so p = 1. PACF Plot has significant lags at 1-3 so q = 1, 2, 3. Regarding stationarity, the ACF plot reveals autocorrelation values surpassing the threshold represented by the dashed line. Additionally, the presence of autocorrelation values above the threshold in the ACF plot indicates stationarity; this suggests that the time series has consistent, predictable patterns over time, confirmed by significant autocorrelation at multiple lags. This stationary behavior is crucial for the effective modeling and forecasting of the series.
Unemployment Rate: ACF Plot has significant lags at 1 so p = 1. PACF Plot has significant lags at 1 so q = 1. Regarding stationarity, the ACF plot reveals autocorrelation values surpassing the threshold represented by the dashed line. Additionally, the presence of autocorrelation values above the threshold in the ACF plot indicates stationarity; this suggests that the time series has consistent, predictable patterns over time, confirmed by significant autocorrelation at multiple lags. This stationary behavior is crucial for the effective modeling and forecasting of the series.
Pfizer Stock Price: The ACF and PACF plots provide critical insights for determining the parameters of our time series model. The ACF plot shows a significant lag at 1-10, suggesting a p-value of 1-10 for the AR component. Similarly, the PACF plot shows a significant lag at 1, indicating a q-value of 1 for the MA component. Additionally, the presence of autocorrelation values above the threshold in the ACF plot indicates stationarity; this suggests that the time series has consistent, predictable patterns over time, confirmed by significant autocorrelation at multiple lags. This stationary behavior is crucial for the effective modeling and forecasting of the series.
Support Rate for Democratic: There are no lags over the dashed line in the ACF plot, which indicates that there is no significant autocorrelation in the series beyond the lag indicated by the highest peak. In this cases, the ACF plot suggests that there is no systematic relationship between the observations at different time points. This lack of autocorrelation implies that the series is likely stationary, as there is no discernible pattern of dependence between consecutive observations.
Support Rate for Independent: There are no lags over the dashed line in the ACF plot, which indicates that there is no significant autocorrelation in the series beyond the lag indicated by the highest peak. In this cases, the ACF plot suggests that there is no systematic relationship between the observations at different time points. This lack of autocorrelation implies that the series is likely stationary, as there is no discernible pattern of dependence between consecutive observations.
Support Rate for Republican: The ACF and PACF plots provide critical insights for determining the parameters of our time series model. The ACF plot shows a significant lag at 1, suggesting a p-value of 1 for the AR component. Similarly, the PACF plot shows a significant lag at 1, indicating a q-value of 1 for the MA component. Additionally, the presence of autocorrelation values above the threshold in the ACF plot indicates stationarity; this suggests that the time series has consistent, predictable patterns over time, confirmed by significant autocorrelation at multiple lags. This stationary behavior is crucial for the effective modeling and forecasting of the series.
Augmented Dickey-Fuller Test
data: pf_vacc_ts1
Dickey-Fuller = -7.4927, Lag order = 3, p-value = 0.01
alternative hypothesis: stationary
Code
tseries::adf.test(case_ts)
Augmented Dickey-Fuller Test
data: case_ts
Dickey-Fuller = -1.8178, Lag order = 3, p-value = 0.6457
alternative hypothesis: stationary
Code
tseries::adf.test(dead_ts)
Augmented Dickey-Fuller Test
data: dead_ts
Dickey-Fuller = -0.54605, Lag order = 3, p-value = 0.9754
alternative hypothesis: stationary
Code
tseries::adf.test(hos_ts1)
Augmented Dickey-Fuller Test
data: hos_ts1
Dickey-Fuller = -12.058, Lag order = 3, p-value = 0.01
alternative hypothesis: stationary
Code
tseries::adf.test(hos_ts2)
Augmented Dickey-Fuller Test
data: hos_ts2
Dickey-Fuller = -2.9428, Lag order = 3, p-value = 0.1968
alternative hypothesis: stationary
Code
tseries::adf.test(hos_ts3)
Augmented Dickey-Fuller Test
data: hos_ts3
Dickey-Fuller = -2.8936, Lag order = 3, p-value = 0.2165
alternative hypothesis: stationary
Code
tseries::adf.test(unemploy_ts)
Augmented Dickey-Fuller Test
data: unemploy_ts
Dickey-Fuller = -8.8759, Lag order = 2, p-value = 0.01
alternative hypothesis: stationary
Code
tseries::adf.test(stock_ts)
Augmented Dickey-Fuller Test
data: stock_ts
Dickey-Fuller = -3.0918, Lag order = 3, p-value = 0.1381
alternative hypothesis: stationary
Code
tseries::adf.test(demo_ts)
Augmented Dickey-Fuller Test
data: demo_ts
Dickey-Fuller = -3.4688, Lag order = 3, p-value = 0.05607
alternative hypothesis: stationary
Code
tseries::adf.test(inde_ts)
Augmented Dickey-Fuller Test
data: inde_ts
Dickey-Fuller = -4.2027, Lag order = 3, p-value = 0.01
alternative hypothesis: stationary
Code
tseries::adf.test(rep_ts)
Augmented Dickey-Fuller Test
data: rep_ts
Dickey-Fuller = -3.4937, Lag order = 3, p-value = 0.05222
alternative hypothesis: stationary
In our project, we delve into an array of statistical series to discern patterns and ascertain stationarity, crucial for understanding sentiment impacts on the stock prices of leading tech companies and broader socio-economic indicators. Our methodology employs rigorous statistical tests, complemented by Autocorrelation Function (ACF) plots, to scrutinize the data’s behavior over time.
Number of Daily Vaccinations Per Million: A p-value below 0.05 signals sufficient grounds to reject the null hypothesis at a 5% significance level, indicating stationarity in our series. This finding, however, contrasts with prior conclusions, suggesting the ACF plot’s superior accuracy, which points toward non-stationarity.
Number of People Vaccinated Per Hundred: The p-value, exceeding 0.05, reveals an insufficient basis to reject the null hypothesis, indicating a non-stationary series. This necessitates further modifications for stationarity, reinforcing conclusions from earlier analyses, including a significant lag order of 3.
Number of People Fully Vaccinated Per Hundred: With a p-value below 0.05, we find adequate evidence to reject the null hypothesis, suggesting stationarity. Yet, this contradicts previous findings, with the ACF plot indicating non-stationarity, challenging our initial conclusion.
Number of Newly Confirmed Cases: A p-value above 0.05 indicates a lack of sufficient evidence to dismiss the null hypothesis, suggesting non-stationarity. This aligns with earlier observations, necessitating adjustments for stationarity, including a noted lag order of 3.
Number of Death Cases: The p-value, again above 0.05, underscores a lack of adequate evidence to reject the null hypothesis, signaling a non-stationary series and the need for further data adjustments. This finding is consistent with prior analyses.
Number of Inpatient Beds: Here, a p-value below 0.05 provides enough justification to reject the null hypothesis, suggesting a stationary series. Nevertheless, this result is at odds with previous analyses, indicating non-stationarity based on the ACF plot.
Number of Inpatient Beds Used for COVID: The p-value surpassing 0.05 suggests insufficient evidence to reject the null hypothesis, pointing to a non-stationary series that requires adjustments, corroborating earlier findings and the significance of a lag order of 3.
Utilization Rate for Inpatient Beds Used for COVID: A high p-value indicates the series’ non-stationarity, echoing the need for adjustments to achieve stationarity and supporting earlier conclusions, including a lag order of 3.
Unemployment Rate: A low p-value indicates sufficient evidence to reject the null hypothesis, suggesting stationarity. However, this contrasts with previous examples, with the ACF plot indicating non-stationarity.
Pfizer Stock Price: With a p-value exceeding 0.05, there’s insufficient evidence to reject the null hypothesis, indicating a non-stationary series requiring adjustments, consistent with earlier findings, including a lag order of 3.
Support Rate for Democratic: A high p-value reveals a lack of evidence to reject the null hypothesis, suggesting non-stationarity and the need for adjustments, contradicting earlier conclusions of stationarity.
Support Rate for Independent: A low p-value provides ample evidence to reject the null hypothesis, indicating a stationary series. This finding aligns with prior conclusions, affirming the series’ stationarity.
Support Rate for Republican: The p-value, exceeding 0.05, indicates insufficient evidence to reject the null hypothesis, suggesting a non-stationary series that necessitates adjustments, in line with earlier analyses.
Through this detailed exploration, we meticulously gauge the stationarity of diverse series, juxtaposing statistical test results against ACF plot insights to draw nuanced conclusions on the dynamic interplay between sentiment, stock price movements, and broader socio-economic indicators.
Detrending and differencing stand as pivotal techniques in the realm of time series analysis, each aimed at achieving the crucial condition of stationarity within a dataset. While navigating the same goal of trend elimination, these methodologies diverge in their approach and application nuances.
Detrending is a targeted process aimed squarely at eradicating the underlying trend from the dataset. This is accomplished by first meticulously estimating the trend component that permeates the time series and then subtracting this estimated trend from the original dataset. The outcome is a transformed series where the original mean has been adjusted to center around zero, effectively neutralizing the trend influence. However, this transformation is not a panacea; detrended data can still exhibit non-stationary characteristics, such as seasonality or variance instabilities, that require further intervention.
Conversely, differencing operates under a broader scope, addressing stationarity by focusing on the differences between consecutive observations. This method is encapsulated by the formula:
\[\Delta y_t = y_t - y_{t-1}\]
where \(\Delta y_t\) represents the difference between the current observation \(y_t\) and its predecessor \(y_{t-1}\). Through this simple yet effective mechanism, differencing excels at mitigating linear trends and highlighting the dynamic changes between data points. Its strength lies particularly in contexts where the time series displays a consistent directional trend, making it a robust choice for such scenarios.
However, it’s worth noting that while differencing is adept at ironing out linear trends, it may falter when faced with nonlinear trends or pronounced seasonal fluctuations. The essence of differencing lies in its ability to simplify the series to a form where patterns and structures become more discernable, albeit at the potential cost of oversimplification in certain complex scenarios.
The decision to employ detrending or differencing hinges on a thorough examination of the time series at hand. The specific characteristics of the dataset, including the nature of its trends and seasonalities, dictate the most appropriate method for achieving stationarity. This choice is not merely technical but strategic, laying the foundation for deeper insights and more accurate forecasts in the pursuit of time series analysis.
4. ARIMA(p,d,q)
In this section, our aim is to identify all potential values for the autoregressive (AR) parameter (p), the moving average (MA) parameter (q), and the differencing parameter (d) based on the autocorrelation function (ACF) and partial autocorrelation function (PACF) plots of the original data.
To determine the value of p, we examine the most significant lags from the PACF plot, which helps us identify the lag orders where the correlation is not accounted for by previous lags.
Conversely, for the value of q, we focus on the most significant lags from the ACF plot, indicating the correlation between observations at different time lags, which informs us about the lag orders that may require inclusion in the moving average model.
Given that we have differenced all series once, the d value is consistently set to 1. However, when evaluating the Akaike Information Criterion (AIC) and Bayesian Information Criterion (BIC) for model selection, we explore both d=0 and d=1 to ensure comprehensive assessment and comparison of model performance.
Daily vaccinations time series: - q = 0,1,2 - d = 0,1 - p = 0,1,2
People vaccinated time series: - q = 0,1,2,3 - d = 0,1 - p = 0,1
People fully vaccinated time series: - q = 0,1,2,3 - d = 0,1 - p = 0,1
Newly confirmed case time series: - q = 0,1,2,3,4,5,6,7,8,9,10 - d = 0,1 - p = 0,1
Death case time series: - q = 0,1,2,3,4,5,6,7,8,9,10 - d = 0,1 - p = 0,1
Inpatient bed time series: - q = 0,1,2 - d = 0,1 - p = 0,1
Inpatient bed used for COVID time series: - q = 0,1,2 - d = 0,1 - p = 0,1,2
Utilization rate for inpatient bed used for COVID time series: - q = 0,1 - d = 0,1 - p = 0,1,2,3
Unemployment rate time series: - q = 0,1 - d = 0,1 - p = 0,1
Pfizer stock price time series: - q = 0,1,2,3,4,5,6,7,8,9,10 - d = 0,1 - p = 0,1
Support rate for democratic time series: - q = 0,1 - d = 0,1 - p = 0,1
Support rate for independent time series: - q = 0,1 - d = 0,1 - p = 0,1
Support rate for republican time series: - q = 0,1 - d = 0,1 - p = 0,1
p d q AIC BIC AICc
16 1 1 3 95.30117 103.0762 99.50117
Code
dvacc[which.min(dvacc$BIC),]
p d q AIC BIC AICc
8 0 1 3 95.34163 101.8208 98.19878
Code
dvacc[which.min(dvacc$AICc),]
p d q AIC BIC AICc
8 0 1 3 95.34163 101.8208 98.19878
The model with the lowest BIC, AICc is ARIMA(0,1,3). While the model with the lowest AIC is ARIMA(1,1,3), the significantly lower BIC and AICc values of ARIMA(0,1,3) underscore its stronger performance. Therefore, based on the evaluation metrics, ARIMA(0,1,3) emerges as the optimal model.
p d q AIC BIC AICc
14 0 1 6 885.7971 899.5057 890.2971
Code
dvacc[which.min(dvacc$BIC),]
p d q AIC BIC AICc
4 0 1 1 891.0065 896.1472 891.6552
Code
dvacc[which.min(dvacc$AICc),]
p d q AIC BIC AICc
10 0 1 4 887.4925 897.7739 889.963
Among the ARIMA models tested, ARIMA(0,1,6) has the lowest AIC value, indicating its superior fit compared to the other models in terms of goodness of fit and complexity. Conversely, ARIMA(0,1,1) boasts the lowest BIC, while ARIMA(0,1,4) exhibits the lowest AICc. Despite these distinctions, a comprehensive evaluation considering all metrics suggests that ARIMA(0,1,6) is the optimal choice, as it strikes a balance between model complexity and performance. Therefore, ARIMA(0,1,6) emerges as the preferred model based on a holistic assessment of AIC, BIC, and AICc values.
p d q AIC BIC AICc
1 0 0 0 -120.7451 -116.9708 -119.6542
Code
dvacc[which.min(dvacc$BIC),]
p d q AIC BIC AICc
1 0 0 0 -120.7451 -116.9708 -119.6542
Code
dvacc[which.min(dvacc$AICc),]
p d q AIC BIC AICc
1 0 0 0 -120.7451 -116.9708 -119.6542
The model with the lowest AIC, AICc is ARIMA(1,0,1). While the model with the lowest BIC is ARIMA(0,0,0), the significantly lower AIC and AICc values of ARIMA(1,0,1) underscore its stronger performance. Therefore, based on the evaluation metrics, ARIMA(1,0,1) emerges as the optimal model.
Code
d =1i =1dvacc =data.frame()ls =matrix(rep(NA, 6*39), nrow =39)for (p in0:1) {for (q in0:10) {for (d in0:1) {if (p -1+ d + q -1<=8) { # Usual thresholdtryCatch({ model <-Arima(diff(stock_ts), order =c(p, d, q), include.drift =TRUE) ls[i, ] =c(p, d, q, model$aic, model$bic, model$aicc) i = i +1 }, error =function(e) {cat("Error occurred for p =", p, ", d =", d, ", q =", q, ":", conditionMessage(e), "\n") }) } } }}
Error occurred for p = 1 , d = 0 , q = 1 : non-stationary AR part from CSS
p d q AIC BIC AICc
27 1 0 3 87.11988 100.0709 89.99168
Code
dvacc[which.min(dvacc$BIC),]
p d q AIC BIC AICc
22 1 0 0 89.34164 96.74223 90.29402
Code
dvacc[which.min(dvacc$AICc),]
p d q AIC BIC AICc
27 1 0 3 87.11988 100.0709 89.99168
The model with the lowest AIC, AICc is ARIMA(1,0,3). While the model with the lowest BIC is ARIMA(1,0,0), the significantly lower AIC and AICc values of ARIMA(1,0,3) underscore its stronger performance. Therefore, based on the evaluation metrics, ARIMA(1,0,3) emerges as the optimal model.
p d q AIC BIC AICc
7 1 0 1 -229.4175 -220.0615 -227.9889
Code
dvacc[which.min(dvacc$BIC),]
p d q AIC BIC AICc
7 1 0 1 -229.4175 -220.0615 -227.9889
Code
dvacc[which.min(dvacc$AICc),]
p d q AIC BIC AICc
7 1 0 1 -229.4175 -220.0615 -227.9889
The model with the lowest AIC, BIC, AICc is ARIMA(1,0,1). So the best model is ARIMA(1,0,1).
In terms of AIC, BIC and AICc, we always want to choose the lowest values, however, it can happen that the same model won’t have the lowest value for AIC, BIC, and AICc at the same time. In that case we favor the results from AIC as that is a better estimator for autoregressive models. In the next section we can see the results from the AIC-BIC analysis.
Final Selection:
Daily vaccinations time series: p=2, d=1, q=2
People vaccinated time series: p=0, d=1, q=3
People fully vaccinated time series: p=0, d=1, q=3
Newly confirmed case time series: p=0, d=1, q=2
Death case time series: p=0, d=1, q=6
Inpatient bed time series: p=0, d=1, q=1
Inpatient bed used for COVID time series: p=2, d=1, q=1
Utilization rate for inpatient bed used for COVID time series: p=2, d=0, q=0
Unemployment rate time series: p=1, d=0, q=1
Pfizer stock price time series: p=1, d=0, q=3
Support rate for democratic time series: p=0, d=0, q=1
Support rate for independent time series: p=0, d=0, q=1
Support rate for republican time series: p=1, d=0, q=1
Series: diff(demo_ts)
ARIMA(0,0,1) with drift
Coefficients:
ma1 intercept drift
-1.0000 -9e-04 0
s.e. 0.0577 NaN NaN
sigma^2 = 0.0004267: log likelihood = 117.72
AIC=-227.44 AICc=-226.51 BIC=-219.96
Training set error measures:
ME RMSE MAE MPE MAPE MASE ACF1
Training set -0.0002909499 0.02000063 0.0157191 NaN Inf 0.5052568 0.002088654
The equation for the model is: \[x_t = w_t - 1.0000w_{t-1} \]
Series: diff(inde_ts)
ARIMA(0,0,1) with drift
Coefficients:
ma1 intercept drift
-1.0000 0.0011 0
s.e. 0.0591 NaN NaN
sigma^2 = 0.001398: log likelihood = 89.24
AIC=-170.48 AICc=-169.55 BIC=-162.99
Training set error measures:
ME RMSE MAE MPE MAPE MASE ACF1
Training set -0.001342649 0.03620296 0.02893673 -Inf Inf 0.4451804 0.06199612
The equation for the model is: \[x_t = w_t - 1.0000w_{t-1} \]
Series: diff(rep_ts)
ARIMA(1,0,1) with drift
Coefficients:
ar1 ma1 intercept drift
0.4095 -1.0000 1e-04 0
s.e. 0.1345 0.0627 NaN NaN
sigma^2 = 0.0004088: log likelihood = 119.71
AIC=-229.42 AICc=-227.99 BIC=-220.06
Training set error measures:
ME RMSE MAE MPE MAPE MASE ACF1
Training set 0.0007249485 0.01935823 0.01596133 NaN Inf 0.6245738 0.06783213
The equation for the model is: \[x_t = 0.4095x_{t-1} + w_t - 1.0000w_{t-1} \]
The Standard Residual Plot presents an encouraging picture, exhibiting characteristics of good stationarity with a relatively constant mean and variance. This stability is a positive sign for the model’s accuracy. Furthermore, the Autocorrelation Function (ACF) plot reinforces this positive assessment by showing a lack of significant correlation among the residuals, suggesting that the model has effectively captured the underlying patterns in the data, leaving behind what appears to be mere white noise. This is indicative of an exceptionally well-fitted model.
The Quantile-Quantile (Q-Q) Plot also leans towards a favorable evaluation, demonstrating a reasonable approximation of normality, though with some variability. This slight deviation does not detract from the overall model’s effectiveness.
However, the Ljung-Box test introduces a layer of complexity with its results. Despite observing variations, the test values exceed the 0.05 threshold (aligned with a 5% significance level), implying a lack of significant autocorrelation. This outcome, coupled with all p-values falling below the 0.05 mark, further validates the model’s robustness, suggesting a commendable fit to the observed data.
The Standard Residual Plot demonstrates commendable stationarity, with a consistently constant mean and variance, suggesting the model’s robustness in capturing the data’s central tendencies and spread. The Autocorrelation Function (ACF) plot further reinforces the model’s efficacy, showing negligible correlation among residuals and implying that the model has adeptly isolated and left behind only white noise. This is indicative of an exceptionally well-fitted model.
In the realm of normality assessment, the Quantile-Quantile (Q-Q) Plot exhibits a satisfactory alignment with normality, although there is room for improvement in mirroring the ideal normal distribution curve more closely. The Ljung-Box test results introduce a nuanced perspective, with values straddling the 0.05 (5% significance) threshold. This indicates a lack of substantial autocorrelation, underscoring the model’s aptitude in fitting the data without overlooking significant patterns.
The analysis of Moving Average parameters reveals a differentiated significance; while the p-values for ma1 and ma2 do not denote statistical significance, falling above the 0.05 threshold, the p-value for ma3 stands out by being less than 0.05. This suggests that while the first two parameters may not contribute significantly to the model, ma3 plays a crucial role, offering insights into the subtleties of the model’s fit and the dynamics captured by this specific parameter.
The Standard Residual Plot presents an encouraging picture of stationarity, characterized by a largely constant mean and variation, indicative of a robust model. The Autocorrelation Function (ACF) plot further bolsters this assessment, displaying an absence of correlation and suggesting that the model has effectively captured the underlying process, leaving only white noise. This is a strong marker of an excellently fitted model. While the Quantile-Quantile (Q-Q) Plot demonstrates a commendable degree of normality, minor deviations are observable, pointing towards an area for potential refinement.
The Ljung-Box test results introduce an element of variability, with values crossing the 0.05 (5% significance) threshold, yet this does not denote significant autocorrelation, reinforcing the model’s adequacy. Although the p-values for ma1 and ma2 slightly exceed 0.05, the p-value for ma3 falls below this mark, suggesting that while certain model parameters may edge towards marginal significance, the overall model integrity remains intact, pointing towards a well-specified model.
converged
<><><><><><><><><><><><><><>
Coefficients:
Estimate SE t.value p.value
ma1 -0.0254 0.1347 -0.1884 0.8515
ma2 -0.6635 0.1331 -4.9840 0.0000
constant -5440.8755 112821.2319 -0.0482 0.9618
sigma^2 estimated as 4.246465e+12 on 38 degrees of freedom
AIC = 32.13855 AICc = 32.15437 BIC = 32.30573
The Standard Residual Plot presents a promising depiction of stationarity, characterized by a consistent mean and variation across the board. The Autocorrelation Function (ACF) plot reinforces this positive assessment, showing no discernible correlation and implying that all residual patterns have been effectively captured by the model, leaving behind only white noise. This is a strong indicator of an excellent model fit.
Further analysis through the Quantile-Quantile (Q-Q) Plot suggests a satisfactory alignment with normality, although there is room for slight improvement. The Ljung-Box test results introduce some variability, with values surpassing the 0.05 threshold (indicative of a 5% significance level). This outcome points to the lack of substantial correlation, further affirming the model’s aptness.
Regarding the moving average parameters, the p-values associated with ma1 exceed the 0.05 mark, contrasting with ma2’s p-value, which falls below this threshold. This differential suggests a nuanced interplay within the model’s components, highlighting areas of both strength and potential refinement. ### Death case time series
The Standard Residual Plot presents a promising outlook, showcasing good signs of stationarity with a consistent mean and variation over time. In the Autocorrelation Function (ACF) plot, the absence of significant correlation further supports the efficacy of our model, suggesting it has successfully captured the underlying patterns in the data, leaving only white noise behind. This is indicative of an excellent model fit. While the Quantile-Quantile (Q-Q) Plot largely aligns with expectations of normality, displaying satisfactory adherence, there is still some variation observed.
Diving deeper into the diagnostic checks, the Ljung-Box test yields intriguing results, with values surpassing the 0.05 threshold (5% significance level). This indicates a lack of significant autocorrelation, reinforcing the model’s adequacy. However, an analysis of the Moving Average (MA) parameters reveals a nuanced picture: while the p-values for ma2 and ma5 exceed the 0.05 mark, suggesting these terms may not contribute significantly to the model, the p-values for ma1, ma3, ma4, and ma6 fall below this threshold, indicating their importance in the model’s structure. This mixed outcome highlights areas for potential refinement and underscores the model’s overall robustness.
converged
<><><><><><><><><><><><><><>
Coefficients:
Estimate SE t.value p.value
ma1 -0.8352 0.0997 -8.3758 0.0000
constant -2100.3772 1731.5890 -1.2130 0.2315
sigma^2 estimated as 4024330014 on 45 degrees of freedom
AIC = 25.1066 AICc = 25.11241 BIC = 25.2247
The Standard Residual Plot presents a positive indication of stationarity, characterized by a consistent mean and variance throughout. This suggests a stable model performance over time. The Autocorrelation Function (ACF) plot reinforces this assessment, showing an absence of correlation among residuals and implying that the model has effectively captured the underlying pattern, leaving only white noise behind. This is a hallmark of an excellently fitted model.
Furthermore, the Quantile-Quantile (Q-Q) Plot demonstrates commendable adherence to normality, albeit with some minor deviations. The consistency in the plot underscores the model’s reliability in normal distribution assumptions. However, the Ljung-Box test introduces a nuanced perspective, displaying values that surpass the 0.05 threshold (5% significance level). This indicates a lack of significant autocorrelation, reinforcing the model’s adeptness at fitting the data effectively, as further evidenced by a p-value below 0.05.
The Standard Residual Plot presents a promising picture, showcasing characteristics of good stationarity with a largely consistent mean and variance across the series. The Autocorrelation Function (ACF) plot further strengthens our confidence in the model’s robustness by displaying negligible correlation, which implies that the model has successfully captured the underlying pattern, leaving only white noise behind. This is indicative of an exceptionally well-fitted model. Meanwhile, the Quantile-Quantile (Q-Q) Plot also leans towards demonstrating commendable normality, albeit with some deviations. The Ljung-Box test results introduce a slight variance, displaying values surpassing the 0.05 threshold (at a 5% significance level), which denotes the lack of substantial correlation—another hallmark of a model that is fitting well. Although the p-value for ar1 marginally exceeds 0.05, the p-values for ar2 and ma1 are below this threshold, further underscoring the model’s efficacy and the accuracy of its fit.
converged
<><><><><><><><><><><><><><>
Coefficients:
Estimate SE t.value p.value
ar1 0.3115 0.1242 2.5083 0.0158
ar2 -0.4916 0.1224 -4.0165 0.0002
xmean 0.0005 0.0028 0.1899 0.8502
sigma^2 estimated as 0.000494291 on 45 degrees of freedom
AIC = -4.595389 AICc = -4.584025 BIC = -4.439455
The Standard Residual Plot presents a promising depiction of stationarity, characterized by a largely constant mean and variance, suggesting the model’s effectiveness in capturing the data’s essence. The Autocorrelation Function (ACF) plot further strengthens this assessment, displaying negligible correlation among residuals and implying that the model residuals resemble white noise—a hallmark of an excellent model fit. Meanwhile, the Quantile-Quantile (Q-Q) Plot offers substantial evidence of normality, albeit with some variability. This is complemented by the outcomes of the Ljung-Box test, which, despite variations, predominantly reports p-values below the 0.05 mark (5% significance level). Such results underscore a lack of significant autocorrelation within the residuals, affirming the model’s robustness and precision in fitting the data.
converged
<><><><><><><><><><><><><><>
Coefficients:
Estimate SE t.value p.value
ar1 0.0308 0.2012 0.1533 0.8796
ma1 -1.0000 0.1216 -8.2224 0.0000
xmean -0.0005 0.0006 -0.8403 0.4098
sigma^2 estimated as 0.0004656674 on 22 degrees of freedom
AIC = -4.386209 AICc = -4.340495 BIC = -4.191189
After implementing first-order differencing, the unemployment rate time series continued to exhibit non-stationary characteristics, prompting the necessity for a second differencing step to attain stationarity. Post-differencing, the Standard Residual Plot demonstrated commendable stationarity, characterized by a mostly constant mean and variance, indicative of a well-adjusted series. The Autocorrelation Function (ACF) plot, revealing no significant correlations, suggests that the model has effectively captured the underlying patterns within the data, leaving behind what appears to be purely white noise. This is a strong indicator of an excellently fitted model.
Additionally, the Quantile-Quantile (Q-Q) Plot shows a satisfactory alignment with normality, though with some deviations. The results from the Ljung-Box test varied, presenting values surpassing the 0.05 threshold (5% significance level), which points to the absence of significant autocorrelations and underscores the model’s adequacy. The analysis of parameter significance revealed that the p-value for the autoregressive term (ar1) exceeded 0.05, suggesting it might not contribute significantly to the model, whereas the moving average term (ma1), with a p-value below 0.05, indicates a meaningful contribution. This nuanced understanding of the model’s components further attests to its robustness in capturing the dynamics of the unemployment rate time series.
The Standard Residual Plot presents a promising indication of stationarity, with the mean and variance appearing mostly constant throughout. This uniformity in the residuals suggests a stable model performance over time. Meanwhile, the Autocorrelation Function (ACF) plot reveals an absence of correlation among the residuals, implying that the model has effectively captured the underlying patterns in the data, leaving behind what appears to be pure white noise. Such an outcome is indicative of an excellently fitted model.
On the other hand, the Quantile-Quantile (Q-Q) Plot showcases a decent approximation to normality, although there is some variability. This suggests that while the model’s residuals closely follow a normal distribution, there are areas of deviation worth noting.
Moreover, the results from the Ljung-Box test introduce some variability, with values surpassing the 0.05 threshold (5% significance level). This would typically indicate potential correlation; however, in this context, it signifies the absence of significant autocorrelation, further affirming the model’s adequacy. Notably, all p-values fall below the 0.05 mark, reinforcing the statistical strength and the well-fitted nature of the model.
converged
<><><><><><><><><><><><><><>
Coefficients:
Estimate SE t.value p.value
ma1 -0.9999 0.0732 -13.6622 0.0000
xmean 0.0002 0.0003 0.8221 0.4153
sigma^2 estimated as 0.0004178928 on 46 degrees of freedom
AIC = -4.736419 AICc = -4.730863 BIC = -4.619469
The Standard Residual Plot exhibits commendable stationarity, characterized by a consistent mean and variance throughout, indicative of a robust model performance. The Autocorrelation Function (ACF) plot further reinforces this by demonstrating an absence of correlation among residuals, thereby suggesting that the model has effectively captured the underlying data patterns, leaving behind only white noise. This is a hallmark of an excellently fitted model. Meanwhile, the Quantile-Quantile (Q-Q) Plot generally aligns with expectations of normality, although minor deviations are observed, which is typical in practical scenarios. The Ljung-Box test results introduce some variability, with certain values crossing the 0.05 (5% significance) threshold. However, the predominance of p-values below this threshold underscores the model’s ability to adequately represent the data without significant autocorrelation among residuals, cementing its status as well-calibrated and fitting.
converged
<><><><><><><><><><><><><><>
Coefficients:
Estimate SE t.value p.value
ma1 -1e+00 0.0635 -15.7571 0.0000
xmean 3e-04 0.0004 0.7764 0.4415
sigma^2 estimated as 0.001318368 on 46 degrees of freedom
AIC = -3.587405 AICc = -3.581849 BIC = -3.470454
The Standard Residual Plot presents a commendable depiction of stationarity, with the mean and variance remaining mostly constant throughout, suggesting that the data points fluctuate around a steady level. The Autocorrelation Function (ACF) plot further reinforces the model’s efficacy by exhibiting negligible correlation among residuals, indicating that the model has successfully captured the underlying pattern in the data, leaving behind what appears to be mere white noise. This observation underscores the model’s robust fit to the data.
In the Quantile-Quantile (Q-Q) Plot, we observe a satisfactory alignment with normality, although there are minor deviations. Such variations are typical and do not significantly detract from the model’s overall performance.
However, the results from the Ljung-Box test introduce a layer of complexity, displaying values that occasionally surpass the 0.05 threshold, which typically denotes a 5% significance level. Despite these variations, the predominance of p-values less than 0.05 throughout our analysis provides strong evidence against significant autocorrelation among residuals, further affirming the model’s aptitude in capturing the essence of the dataset without overfitting.
Collectively, these diagnostic tools paint a picture of a well-adjusted model, adept at navigating through the intricacies of the data to offer valuable insights, albeit with room for minor improvements as indicated by the Q-Q plot and Ljung-Box test variations.
converged
<><><><><><><><><><><><><><>
Coefficients:
Estimate SE t.value p.value
ar1 0.41 0.1351 3.0350 0.0040
ma1 -1.00 0.0628 -15.9210 0.0000
xmean 0.00 0.0003 0.0824 0.9347
sigma^2 estimated as 0.0003748071 on 45 degrees of freedom
AIC = -4.821046 AICc = -4.809682 BIC = -4.665112
The Standard Residual Plot presents a promising outlook, showcasing robust stationarity characterized by a largely constant mean and variance, indicative of a well-behaved model. The Autocorrelation Function (ACF) plot further corroborates this by revealing negligible correlation, implying that the residuals amount to white noise and underscoring the model’s comprehensive capture of underlying patterns—a hallmark of excellent model fit. Meanwhile, the Quantile-Quantile (Q-Q) Plot demonstrates commendable adherence to normality, albeit with minor deviations. The Ljung-Box test results introduce a nuance, exhibiting values surpassing the 0.05 threshold (at a 5% significance level), thereby negating the presence of substantial autocorrelation and endorsing the model’s aptness. Crucially, all observed p-values fall below the 0.05 mark, reinforcing the statistical soundness of our model.
Series: diff(d_vacc_ts)
ARIMA(0,0,2) with zero mean
Coefficients:
ma1 ma2
0.8344 0.5402
s.e. 0.1780 0.2384
sigma^2 = 3.458e+09: log likelihood = -359.11
AIC=724.22 AICc=725.18 BIC=728.32
The best model from the step above was ARIMA(2,1,2), while the best model Auto ARIMA gave me is ARIMA(0,0,2) with drift. This discrepancy raises concerns about reliability, as Auto ARIMA tends to overlook instances of significant lag correlation, as evidenced by the ACF/PACF plots. Instead, it prioritizes minimizing AIC/BIC values without considering the full spectrum of model dynamics. This narrow focus risks recommending a model prone to overfitting, lacking in the comprehensive assessment necessary for accurate forecasting.
The best model from the step above was ARIMA(0,1,3), while the best model Auto ARIMA gave me is ARIMA(0,1,1) with drift. This discrepancy raises concerns about reliability, as Auto ARIMA tends to overlook instances of significant lag correlation, as evidenced by the ACF/PACF plots. Instead, it prioritizes minimizing AIC/BIC values without considering the full spectrum of model dynamics. This narrow focus risks recommending a model prone to overfitting, lacking in the comprehensive assessment necessary for accurate forecasting.
The best model from the step above was ARIMA(0,1,3), while the best model Auto ARIMA gave me is ARIMA(0,1,2) with drift. This discrepancy raises concerns about reliability, as Auto ARIMA tends to overlook instances of significant lag correlation, as evidenced by the ACF/PACF plots. Instead, it prioritizes minimizing AIC/BIC values without considering the full spectrum of model dynamics. This narrow focus risks recommending a model prone to overfitting, lacking in the comprehensive assessment necessary for accurate forecasting.
Code
auto.arima(diff(case_ts))
Series: diff(case_ts)
ARIMA(0,0,1) with non-zero mean
Coefficients:
ma1 mean
0.8182 2324827
s.e. 0.0780 554675
sigma^2 = 4.192e+12: log likelihood = -669.47
AIC=1344.95 AICc=1345.58 BIC=1350.16
The best model from the step above was ARIMA(0,1,2), while the best model Auto ARIMA gave me is ARIMA(0,0,1) with drift. This discrepancy raises concerns about reliability, as Auto ARIMA tends to overlook instances of significant lag correlation, as evidenced by the ACF/PACF plots. Instead, it prioritizes minimizing AIC/BIC values without considering the full spectrum of model dynamics. This narrow focus risks recommending a model prone to overfitting, lacking in the comprehensive assessment necessary for accurate forecasting.
The best model from the step above was ARIMA(0,1,6), while the best model Auto ARIMA gave me is ARIMA(0,1,1) with drift. This discrepancy raises concerns about reliability, as Auto ARIMA tends to overlook instances of significant lag correlation, as evidenced by the ACF/PACF plots. Instead, it prioritizes minimizing AIC/BIC values without considering the full spectrum of model dynamics. This narrow focus risks recommending a model prone to overfitting, lacking in the comprehensive assessment necessary for accurate forecasting.
The best model from the step above and from the Auto ARIMA was all ARIMA(0,1,1), which means it is the best model.
Code
auto.arima(diff(hos_ts2))
Series: diff(hos_ts2)
ARIMA(0,0,3) with zero mean
Coefficients:
ma1 ma2 ma3
0.0996 -0.4661 -0.2804
s.e. 0.1375 0.1643 0.1693
sigma^2 = 462716317: log likelihood = -545.8
AIC=1099.61 AICc=1100.54 BIC=1107.09
The best model from the step above was ARIMA(2,1,1), while the best model Auto ARIMA gave me is ARIMA(0,0,3) with drift. This discrepancy raises concerns about reliability, as Auto ARIMA tends to overlook instances of significant lag correlation, as evidenced by the ACF/PACF plots. Instead, it prioritizes minimizing AIC/BIC values without considering the full spectrum of model dynamics. This narrow focus risks recommending a model prone to overfitting, lacking in the comprehensive assessment necessary for accurate forecasting.
Code
auto.arima(diff(hos_ts3))
Series: diff(hos_ts3)
ARIMA(0,0,3) with zero mean
Coefficients:
ma1 ma2 ma3
0.1479 -0.5111 -0.3791
s.e. 0.1330 0.1547 0.1596
sigma^2 = 0.0005008: log likelihood = 115.25
AIC=-222.51 AICc=-221.58 BIC=-215.02
The best model from the step above was ARIMA(2,0,0), while the best model Auto ARIMA gave me is ARIMA(0,0,3) with drift. This discrepancy raises concerns about reliability, as Auto ARIMA tends to overlook instances of significant lag correlation, as evidenced by the ACF/PACF plots. Instead, it prioritizes minimizing AIC/BIC values without considering the full spectrum of model dynamics. This narrow focus risks recommending a model prone to overfitting, lacking in the comprehensive assessment necessary for accurate forecasting.
Code
auto.arima(diff(diff(unemploy_ts)))
Series: diff(diff(unemploy_ts))
ARIMA(0,0,0) with zero mean
sigma^2 = 0.000934: log likelihood = 51.73
AIC=-101.45 AICc=-101.28 BIC=-100.23
The best model from the step above was ARIMA(1,0,1), while the best model Auto ARIMA gave me is ARIMA(0,0,0) with drift. This discrepancy raises concerns about reliability, as Auto ARIMA tends to overlook instances of significant lag correlation, as evidenced by the ACF/PACF plots. Instead, it prioritizes minimizing AIC/BIC values without considering the full spectrum of model dynamics. This narrow focus risks recommending a model prone to overfitting, lacking in the comprehensive assessment necessary for accurate forecasting.
Code
auto.arima(diff(stock_ts))
Series: diff(stock_ts)
ARIMA(2,0,0) with zero mean
Coefficients:
ar1 ar2
-0.2074 0.3307
s.e. 0.1500 0.1513
sigma^2 = 0.3424: log likelihood = -40.65
AIC=87.29 AICc=87.85 BIC=92.84
The best model from the step above was ARIMA(1,0,3), while the best model Auto ARIMA gave me is ARIMA(2,0,0) with drift. This discrepancy raises concerns about reliability, as Auto ARIMA tends to overlook instances of significant lag correlation, as evidenced by the ACF/PACF plots. Instead, it prioritizes minimizing AIC/BIC values without considering the full spectrum of model dynamics. This narrow focus risks recommending a model prone to overfitting, lacking in the comprehensive assessment necessary for accurate forecasting.
Code
auto.arima(diff(demo_ts))
Series: diff(demo_ts)
ARIMA(0,0,1) with zero mean
Coefficients:
ma1
-0.9552
s.e. 0.1012
sigma^2 = 0.0004485: log likelihood = 116.21
AIC=-228.43 AICc=-228.16 BIC=-224.69
The best model from the step above and from the Auto ARIMA was all ARIMA(0,0,1), which means it is the best model.
Code
auto.arima(diff(inde_ts))
Series: diff(inde_ts)
ARIMA(1,0,0) with zero mean
Coefficients:
ar1
-0.5722
s.e. 0.1176
sigma^2 = 0.001697: log likelihood = 85.29
AIC=-166.58 AICc=-166.31 BIC=-162.83
The best model from the step above was ARIMA(0,0,1), while the best model Auto ARIMA gave me is ARIMA(1,0,0) with drift. This discrepancy raises concerns about reliability, as Auto ARIMA tends to overlook instances of significant lag correlation, as evidenced by the ACF/PACF plots. Instead, it prioritizes minimizing AIC/BIC values without considering the full spectrum of model dynamics. This narrow focus risks recommending a model prone to overfitting, lacking in the comprehensive assessment necessary for accurate forecasting.
Code
auto.arima(diff(rep_ts))
Series: diff(rep_ts)
ARIMA(0,0,1) with zero mean
Coefficients:
ma1
-0.8013
s.e. 0.1445
sigma^2 = 0.0004739: log likelihood = 115.59
AIC=-227.18 AICc=-226.92 BIC=-223.44
The best model from the step above was ARIMA(1,0,1), while the best model Auto ARIMA gave me is ARIMA(0,0,1) with drift. This discrepancy raises concerns about reliability, as Auto ARIMA tends to overlook instances of significant lag correlation, as evidenced by the ACF/PACF plots. Instead, it prioritizes minimizing AIC/BIC values without considering the full spectrum of model dynamics. This narrow focus risks recommending a model prone to overfitting, lacking in the comprehensive assessment necessary for accurate forecasting.
Auto ARIMA may not always serve as the most dependable model for forecasting, for several reasons. Firstly, its reliance on predefined criteria for model selection can sometimes overlook subtle nuances within the data, which might be crucial for accurate predictions. Additionally, the automated nature of Auto ARIMA increases the risk of overfitting or selecting a model that is less than optimal. Thus, although Auto ARIMA is an incredibly potent analytical tool, it is essential to approach its projected results with caution and not rely on them unquestioningly.
In the presented forecast graphs, the predictive trajectory is depicted by a blue line, surrounded by a confidence band in two shades of purple. The darker purple represents the 95% confidence interval, indicating a high level of certainty, while the lighter purple corresponds to the 5% interval, denoting lower confidence levels. Notably, as the forecast extends into the future, the confidence band expands, signifying a widening interval. This expansion reflects an increase in forecast uncertainty—the further we project into the future, the more variable and less certain the predictions become. This pattern is consistently observed across all plots, underscoring the inherent challenge of forecasting over extended periods.
The plot showcases a comparison of different forecasting methods for daily vaccination numbers. The black line represents the actual historical data for daily vaccinations, displaying a sharp peak in early 2021 and another in early 2022, followed by a decline. The forecasts from ARIMA, Drift, Mean, and Naïve models are depicted as flat lines beyond the historical data, indicating their prediction for future values. The ARIMA forecast appears slightly above zero, suggesting minimal change in future vaccination numbers, which could imply a matured vaccination campaign. Drift and Mean models predict a very slight downward and upward trend, respectively, while the Naïve model, often used as a baseline comparison, suggests no change, simply carrying the last observed data point forward. The stability in these predictions may reflect an anticipation that vaccination rates will level off, having addressed the immediate demand.
The plot is a visual representation of the comparison between different forecasting methods for the number of people vaccinated over time. The historical data is shown by the black line, indicating the growth trend of vaccinations until the current period. The projections made by the ARIMA, Drift, Mean, and Naïve methods are depicted as flat lines extending from the last historical point into the future (2023 and beyond).
The ARIMA model predicts a slight increase in vaccination numbers, while the Drift method suggests a more optimistic steady rise. The Mean model forecasts a constant rate, and the Naïve method, which carries the last observed value forward, also indicates no change. It is clear that these models have varying degrees of optimism regarding the future trend of vaccination numbers, with ARIMA and Drift expecting growth, while Mean and Naïve forecasts imply stabilization.
This plot compares the forecasts of fully vaccinated individuals using several time series models. The black line represents the actual number of people fully vaccinated over time, showing an initial steep increase that plateaus as it moves into 2022. Predictions from the ARIMA, Drift, Mean, and Naïve models extend from the last data point. The ARIMA model projects a continued but slowing increase in fully vaccinated people, while the Drift model shows a steeper increase, suggesting higher future vaccination rates. The Mean model predicts a flat trend, indicating no significant change moving forward, and the Naïve model simply extends the last known value into the future, suggesting a static forecast. The models reflect different assumptions about the continuation of vaccination efforts and possible changes in public health policy or vaccine uptake.
The graph presents a forecast comparison for newly confirmed COVID-19 cases using various time series models. The historical data, illustrated by the black line, shows an increasing trend through 2020 and 2021, with a plateau into 2022. The forecast models—ARIMA, Drift, Mean, and Naïve—are indicated by different colored lines beyond the last historical data point. The ARIMA model predicts a steady upward trend, suggesting an increase in cases. In contrast, the Drift model shows a flat forecast, indicating little change. The Mean model also forecasts a constant trend, and the Naïve model projects a continuation of the last observed value. This graphical representation provides an outlook on potential future case trends based on different modeling approaches.
The plot illustrates a forecast comparison for COVID-19 death cases using various time series models. The black line represents the historical number of deaths, increasing initially and then plateauing into 2022. Forecasts by ARIMA, Drift, Mean, and Naïve models are shown as colored lines projecting beyond the last data point. The ARIMA model predicts an upward trend, possibly anticipating a rise in death cases. The Drift model’s forecast remains constant, while the Mean model suggests slight growth. The Naïve model extends the last value into the future, implying no immediate change. This analysis might help in understanding and preparing for potential future scenarios in public health planning.
The plot compares different time series forecasting methods for the number of inpatient hospital beds occupied over time. The black line indicates the actual historical data, which shows a rapid increase at the beginning, stabilizing as it moves into the latter part of 2021 and remains relatively flat into 2022. Forecasts by the ARIMA, Drift, Mean, and Naïve methods are represented as colored lines extending from the end of the actual data into future years. The ARIMA model shows an optimistic continuous increase in bed occupancy, while the Drift model forecasts a flat trend. The Mean model predicts a very slight increase, suggesting stability, and the Naïve model extends the last known value, implying no expected change. Each model’s prediction reflects different assumptions and interpretations of the historical data’s underlying patterns.
Code
fit_techmu_bench <-Arima(hos_ts2, order=c(2, 1, 1),include.drift =FALSE) autoplot(hos_ts2) +autolayer(meanf(hos_ts2, h=10),series="Mean", PI=FALSE) +autolayer(naive(hos_ts2, h=10),series="Naïve", PI=FALSE)+autolayer(rwf(hos_ts2, drift=TRUE, h=10),series="Drift", PI=FALSE)+autolayer(forecast(hos_ts2 ,10), series="Arima",PI=FALSE) +theme_bw() +ggtitle("Benchmark Methods Comparison with Inpatient Bed Number Used for COVID")+guides(colour=guide_legend(title="Forecast"))
The plot illustrates a forecast comparison using different models for the number of inpatient hospital beds utilized for COVID-19 patients. The actual historical data, represented by the black line, shows several spikes, indicating surges in hospital bed usage at different times, presumably correlating with waves of the pandemic. Moving into the future, forecasts from ARIMA, Drift, Mean, and Naïve methods show diverging trends. ARIMA expects an increase, while Drift indicates a flat future trend. Mean and Naïve models suggest a slight increase and no change, respectively. The models likely reflect different assumptions about pandemic progression and healthcare needs.
Code
fit_techmu_bench <-Arima(hos_ts3, order=c(2, 0, 0),include.drift =FALSE) autoplot(hos_ts3) +autolayer(meanf(hos_ts3, h=10),series="Mean", PI=FALSE) +autolayer(naive(hos_ts3, h=10),series="Naïve", PI=FALSE)+autolayer(rwf(hos_ts3, drift=TRUE, h=10),series="Drift", PI=FALSE)+autolayer(forecast(hos_ts3 ,10), series="Arima",PI=FALSE) +theme_bw() +ggtitle("Benchmark Methods Comparison with Utilization Rate for Inpatient Bed Used for COVID")+guides(colour=guide_legend(title="Forecast"))
This plot appears to be a time series forecast comparing different methods (ARIMA, Drift, Mean, Naive) for predicting the utilization rate of inpatient beds used for COVID. The historical data shows significant fluctuations, which could correspond to various waves or surges in COVID cases. The forecast section shows that while some methods predict stability or a decline, the ARIMA model suggests a potential increase in bed utilization, which might anticipate a rise in cases or a change in hospitalization rates. The other methods seem to predict a relatively flat or stable future trend.
This plot appears to be a graphical representation comparing different forecasting methods for unemployment rates over time. The black line represents historical data on unemployment rates. The colored lines, which represent forecasts from various methods such as ARIMA, Drift, Mean, and Naïve, start from where the historical data ends. The plot shows the unemployment rate sharply increasing in 2020, likely due to the COVID-19 pandemic, then gradually decreasing over time, indicating recovery. The ARIMA model forecast seems to indicate a slight increase in unemployment in the future, while the Drift, Mean, and Naïve forecasts suggest a stable or decreasing trend.
The plot shows the historical stock price for Pfizer, represented by the black line, along with forecasts from different models: ARIMA, Drift, Mean, and Naïve. The colored horizontal lines indicate the forecasted stock price level according to each model from the present to 2024. The ARIMA model forecasts a slight decline, while the Drift and Mean models predict a stabilization of the stock price. The Naïve forecast suggests a more significant decline. This visualization is used to compare how different statistical methods anticipate the stock price trend based on historical data.
Code
fit_techmu_bench <-Arima(demo_ts, order=c(0, 0, 1),include.drift =FALSE) autoplot(demo_ts) +autolayer(meanf(demo_ts, h=10),series="Mean", PI=FALSE) +autolayer(naive(demo_ts, h=10),series="Naïve", PI=FALSE)+autolayer(rwf(demo_ts, drift=TRUE, h=10),series="Drift", PI=FALSE)+autolayer(forecast(demo_ts ,10), series="Arima",PI=FALSE) +theme_bw() +ggtitle("Benchmark Methods Comparison with Support Rate for Democratic")+guides(colour=guide_legend(title="Forecast"))
The uploaded plot appears to compare the performance of various forecasting methods—Arima, Drift, Mean, and Naïve—on a particular dataset over time. The solid black line likely represents actual historical data, and the horizontal colored lines project future predictions according to each method. These predictions might illustrate expected trends or levels for a variable such as support rates for a political party, stock prices, healthcare metrics, or economic indicators. The Arima forecast line suggests changes over time, while Drift, Mean, and Naïve methods seem to predict a constant future value, likely based on different statistical assumptions or calculations. This visualization helps to evaluate the different approaches to forecasting and their potential accuracy in predicting future trends or values.
Code
fit_techmu_bench <-Arima(inde_ts, order=c(0, 0, 1),include.drift =FALSE) autoplot(inde_ts) +autolayer(meanf(inde_ts, h=10),series="Mean", PI=FALSE) +autolayer(naive(inde_ts, h=10),series="Naïve", PI=FALSE)+autolayer(rwf(inde_ts, drift=TRUE, h=10),series="Drift", PI=FALSE)+autolayer(forecast(inde_ts ,10), series="Arima",PI=FALSE) +theme_bw() +ggtitle("Benchmark Methods Comparison with Support Rate for Independent")+guides(colour=guide_legend(title="Forecast"))
The plot you’ve shared appears to depict a time series analysis using various benchmark methods such as ARIMA, Drift, Mean, and Naïve to forecast future values related to a specific metric. The actual historical data is shown by the solid black line, which seems to have a particular trend or pattern. The forecast lines for each method start from where the actual data ends and project into the future, displaying the predicted values according to each method.
ARIMA is showing a distinct upward or downward trend, suggesting a specific model-based prediction. The Drift method appears to forecast a linear trend that picks up from the last observed point. The Mean forecast suggests that future values will hover around the historical average, while the Naïve method seems to project that future values will remain constant at the last observed value. Each method offers a different perspective on future expectations based on past data.
Code
fit_techmu_bench <-Arima(rep_ts, order=c(1, 0, 1),include.drift =FALSE) autoplot(rep_ts) +autolayer(meanf(rep_ts, h=10),series="Mean", PI=FALSE) +autolayer(naive(rep_ts, h=10),series="Naïve", PI=FALSE)+autolayer(rwf(rep_ts, drift=TRUE, h=10),series="Drift", PI=FALSE)+autolayer(forecast(rep_ts ,10), series="Arima",PI=FALSE) +theme_bw() +ggtitle("Benchmark Methods Comparison with Support Rate for Republican")+guides(colour=guide_legend(title="Forecast"))
It seems there was an error processing the image you’ve uploaded. Could you provide a description of what the image contains or try uploading it again? If it’s a plot or a graph, details about the axes, any legends or keys, and the general trend or data points shown would be helpful for me to give an explanation.
You can observe from the seasonal differenced dataset that the time series plot looks significantly different. The seasonal cycles are removed, revealing the true trend of the data. Looking at the ACF and PACF plots, we can determine that:
P: 1,2 D: 1 Q: 1
Code
p_vacc_ts %>%diff(lag=12) %>%ggtsdisplay()
You can observe that there is no obvious seasonal difference in the dataset. The seasonal cycles are removed, revealing the true trend of the data. Looking at the ACF and PACF plots, we can determine that:
P: 1 D: 1,2 Q: 1
Code
pf_vacc_ts %>%diff(lag=12) %>%ggtsdisplay()
You can observe that there is no obvious seasonal difference in the dataset. The seasonal cycles are removed, revealing the true trend of the data. Looking at the ACF and PACF plots, we can determine that:
P: 1 D: 1,2 Q: 1
Code
case_ts %>%diff(lag=12) %>%ggtsdisplay()
You can observe that there is no obvious seasonal difference in the dataset. The seasonal cycles are removed, revealing the true trend of the data. Looking at the ACF and PACF plots, we can determine that:
P: 1 D: 1,2,3,4 Q: 1
Code
dead_ts %>%diff(lag=12) %>%ggtsdisplay()
You can observe that there is no obvious seasonal difference in the dataset. The seasonal cycles are removed, revealing the true trend of the data. Looking at the ACF and PACF plots, we can determine that:
P: 1 D: 1,2,3,4,5 Q: 1
Code
hos_ts1 %>%diff(lag=12) %>%ggtsdisplay()
You can observe that there is no obvious seasonal difference in the dataset. The seasonal cycles are removed, revealing the true trend of the data. Looking at the ACF and PACF plots, we can determine that:
P: 1 D: 1,2 Q: 1
Code
hos_ts2 %>%diff(lag=12) %>%ggtsdisplay()
You can observe from the seasonal differenced dataset that the time series plot looks significantly different. The seasonal cycles are removed, revealing the true trend of the data. Looking at the ACF and PACF plots, we can determine that:
P: 1 D: 1 Q: 1
Code
hos_ts3 %>%diff(lag=12) %>%ggtsdisplay()
You can observe from the seasonal differenced dataset that the time series plot looks significantly different. The seasonal cycles are removed, revealing the true trend of the data. Looking at the ACF and PACF plots, we can determine that:
P: 1,2 D: 1 Q: 1
Code
unemploy_ts %>%diff(lag=12) %>%ggtsdisplay()
You can observe that there is no obvious seasonal difference in the dataset. The seasonal cycles are removed, revealing the true trend of the data. Looking at the ACF and PACF plots, we can determine that:
P: 1 D: 1 Q: 1
Code
stock_ts %>%diff(lag=12) %>%ggtsdisplay()
You can observe from the seasonal differenced dataset that the time series plot looks significantly different. The seasonal cycles are removed, revealing the true trend of the data. Looking at the ACF and PACF plots, we can determine that:
P: 1 D: 1,2 Q: 1
Code
demo_ts %>%diff(lag=12) %>%ggtsdisplay()
You can observe from the seasonal differenced dataset that the time series plot looks significantly different. The seasonal cycles are removed, revealing the true trend of the data. Looking at the ACF and PACF plots, we can determine that:
P: 1 D: 1 Q: 1
Code
inde_ts %>%diff(lag=12) %>%ggtsdisplay()
You can observe from the seasonal differenced dataset that the time series plot looks significantly different. The seasonal cycles are removed, revealing the true trend of the data. Looking at the ACF and PACF plots, we can determine that:
P: 1 D: 1 Q: 1
Code
rep_ts %>%diff(lag=12) %>%ggtsdisplay()
You can observe from the seasonal differenced dataset that the time series plot looks significantly different. The seasonal cycles are removed, revealing the true trend of the data. Looking at the ACF and PACF plots, we can determine that:
p d q P D Q AIC BIC AICc
25 3 1 0 0 1 0 68.91017 72.00053 72.54654
Code
output[which.min(output$BIC),]
p d q P D Q AIC BIC AICc
21 2 1 0 0 1 0 69.08627 71.40404 71.08627
Code
output[which.min(output$AICc),]
p d q P D Q AIC BIC AICc
21 2 1 0 0 1 0 69.08627 71.40404 71.08627
The model with the lowest AIC, BIC amd AICc is ARIMA(2,1,0)x(0,1,0)12.
Due to the presence of non-stationary seasonality in this time series data, we have opted to discontinue its use. Non-stationary seasonality implies that the patterns and trends within the data exhibit variations over time, without displaying a consistent and predictable behavior. As a result, attempting to model or analyze such data may lead to unreliable or inaccurate outcomes. Hence, to ensure the robustness and validity of our analyses, we have decided to cease utilizing this particular time series.
p d q P D Q AIC BIC AICc
10 0 1 2 0 1 1 -138.2548 -132.0334 -136.9214
Code
output[which.min(output$BIC),]
p d q P D Q AIC BIC AICc
10 0 1 2 0 1 1 -138.2548 -132.0334 -136.9214
Code
output[which.min(output$AICc),]
p d q P D Q AIC BIC AICc
10 0 1 2 0 1 1 -138.2548 -132.0334 -136.9214
The model with the lowest AIC, BIC amd AICc is ARIMA(0,1,2)x(0,1,1)12.
Due to the presence of non-stationary seasonality in this time series data, we have opted to discontinue its use. Non-stationary seasonality implies that the patterns and trends within the data exhibit variations over time, without displaying a consistent and predictable behavior. As a result, attempting to model or analyze such data may lead to unreliable or inaccurate outcomes. Hence, to ensure the robustness and validity of our analyses, we have decided to cease utilizing this particular time series.
p d q P D Q AIC BIC AICc
10 0 1 2 0 1 1 -152.7731 -146.439 -151.4828
Code
output[which.min(output$BIC),]
p d q P D Q AIC BIC AICc
10 0 1 2 0 1 1 -152.7731 -146.439 -151.4828
Code
output[which.min(output$AICc),]
p d q P D Q AIC BIC AICc
10 0 1 2 0 1 1 -152.7731 -146.439 -151.4828
The model with the lowest AIC, BIC amd AICc is ARIMA(0,1,2)x(0,1,1)12.
12. Fitting Best SARIMA(p,d,q) & Diagnostics
Due to the limitation where the number of lags exceeds the available number of observations in some of the time series data, we’ve adjusted these SARIMA model to utilize 4 lags instead of the initially intended 12. This adaptation ensures that the model remains feasible and effectively captures the temporal dependencies within the data, albeit with a reduced lag length. While this modification may slightly alter the model’s predictive capacity, it allows us to derive meaningful insights and forecasts while circumventing the constraint posed by the insufficient number of observations.
converged
<><><><><><><><><><><><><><>
Coefficients:
Estimate SE t.value p.value
ma1 -0.9153 0.2663 -3.4378 0.0023
sigma^2 estimated as 8164902333 on 22 degrees of freedom
AIC = 25.91335 AICc = 25.92164 BIC = 26.01209
The Standard Residual Plot appears good, displaying stationarity with a nearly constant mean and variation.
The Autocorrelation Function (ACF) Plot shows almost no correlation indicating that the model has harnessed everything and all that is left is white noise. This indicates a good model fit.
The Quantile-Quantile (Q-Q) Plot demonstrates near-normality.
The Ljung-Box test results reveal values above the 0.05 (5% significance) threshold, indicating a good fit.
$ttable: all coefficient is significant.
The equation for the model is: \[x_t = w_t -0.9153w_{t-1} \]
converged
<><><><><><><><><><><><><><>
Coefficients:
Estimate SE t.value p.value
ar1 0.4289 0.2617 1.6388 0.1161
ma1 0.9434 0.2609 3.6161 0.0016
ma2 0.3449 0.3130 1.1018 0.2830
sigma^2 estimated as 2.270531 on 21 degrees of freedom
AIC = 4.070457 AICc = 4.120457 BIC = 4.266799
The Standard Residual Plot appears good, displaying stationarity with a nearly constant mean and variation.
The Autocorrelation Function (ACF) Plot shows almost no correlation indicating that the model has harnessed everything and all that is left is white noise. This indicates a good model fit.
The Quantile-Quantile (Q-Q) Plot demonstrates near-normality.
The Ljung-Box test results reveal values above the 0.05 (5% significance) threshold, indicating a good fit.
$ttable: only ma1 is significant.
The equation for the model is: \[x_t = 0.4289x_{t-1} + w_t + 0.9434w_{t-1} + 0.3449w_{t-2}\]
converged
<><><><><><><><><><><><><><>
Coefficients:
Estimate SE t.value p.value
ar1 1.4015 0.1538 9.1117 0
ar2 -0.8044 0.1419 -5.6687 0
sigma^2 estimated as 2.090858 on 22 degrees of freedom
AIC = 3.950764 AICc = 3.974574 BIC = 4.098021
The Standard Residual Plot appears good, displaying stationarity with a nearly constant mean and variation.
The Autocorrelation Function (ACF) Plot shows almost no correlation indicating that the model has harnessed everything and all that is left is white noise. This indicates a good model fit.
The Quantile-Quantile (Q-Q) Plot demonstrates near-normality.
The Ljung-Box test results reveal values above the 0.05 (5% significance) threshold, indicating a good fit.
$ttable: all coefficients are significant.
The equation for the model is: \[x_t = 1.40159x_{t-1} - 0.8044x_{t-2}\]
converged
<><><><><><><><><><><><><><>
Coefficients:
Estimate SE t.value p.value
ma1 1.8515 0.2207 8.3910 0.0000
ma2 0.9997 0.2351 4.2512 0.0002
sma1 -0.9977 0.5750 -1.7352 0.0941
sigma^2 estimated as 85591993 on 27 degrees of freedom
AIC = 22.10196 AICc = 22.13273 BIC = 22.28878
The Standard Residual Plot appears good, displaying stationarity with a nearly constant mean and variation.
The Autocorrelation Function (ACF) Plot shows almost no correlation indicating that the model has harnessed everything and all that is left is white noise. This indicates a good model fit.
The Quantile-Quantile (Q-Q) Plot demonstrates near-normality.
The Ljung-Box test results reveal values above the 0.05 (5% significance) threshold, indicating a good fit.
$ttable: ma1 and ma2 are significant.
The equation for the model is: \[x_t = 1.8515x_{t-1} + 0.9997x_{t-2} - 0.9977 + a_{t}\]
converged
<><><><><><><><><><><><><><>
Coefficients:
Estimate SE t.value p.value
ar1 -0.4824 0.1197 -4.0287 0.0003
ar2 -0.4614 0.1364 -3.3828 0.0019
ar3 -0.8923 0.0828 -10.7763 0.0000
sigma^2 estimated as 2912500402 on 32 degrees of freedom
AIC = 24.99912 AICc = 25.02124 BIC = 25.17687
The Standard Residual Plot appears good, displaying stationarity with a nearly constant mean and variation.
The Autocorrelation Function (ACF) Plot shows almost no correlation indicating that the model has harnessed everything and all that is left is white noise. This indicates a good model fit.
The Quantile-Quantile (Q-Q) Plot demonstrates near-normality.
The Ljung-Box test results reveal values above the 0.05 (5% significance) threshold, indicating a good fit.
$ttable: all coefficients are significant.
The equation for the model is: \[x_t = -0.4824x_{t-1} - 0.4614x_{t-2} - 0.8923x_{t-2}\]
converged
<><><><><><><><><><><><><><>
Coefficients:
Estimate SE t.value p.value
ma1 -0.5988 0.2210 -2.7100 0.0107
ma2 -0.4011 0.1836 -2.1847 0.0363
sma1 -0.9994 0.3640 -2.7454 0.0098
sigma^2 estimated as 416745453 on 32 degrees of freedom
AIC = 23.49261 AICc = 23.51473 BIC = 23.67037
The Standard Residual Plot appears good, displaying stationarity with a nearly constant mean and variation.
The Autocorrelation Function (ACF) Plot shows almost no correlation indicating that the model has harnessed everything and all that is left is white noise. This indicates a good model fit.
The Quantile-Quantile (Q-Q) Plot demonstrates near-normality.
The Ljung-Box test results reveal values above the 0.05 (5% significance) threshold, indicating a good fit.
$ttable: all coefficients are significant.
The equation for the model is: \[x_t = -0.5988x_{t-1} - 0.4011x_{t-2} - 0.9994 + a_{t}\]
converged
<><><><><><><><><><><><><><>
Coefficients:
Estimate SE t.value p.value
ma1 -0.4717 0.2009 -2.3476 0.0252
ma2 -0.5283 0.1702 -3.1046 0.0040
sma1 -0.9997 0.3681 -2.7156 0.0106
sigma^2 estimated as 0.0005031713 on 32 degrees of freedom
AIC = -3.950136 AICc = -3.928016 BIC = -3.772382
The Standard Residual Plot appears good, displaying stationarity with a nearly constant mean and variation.
The Autocorrelation Function (ACF) Plot shows almost no correlation indicating that the model has harnessed everything and all that is left is white noise. This indicates a good model fit.
The Quantile-Quantile (Q-Q) Plot demonstrates near-normality.
The Ljung-Box test results reveal values above the 0.05 (5% significance) threshold, indicating a good fit.
$ttable: all coefficients are significant.
The equation for the model is: \[x_t = -0.4717x_{t-1} - 0.5283x_{t-2} - 0.9997 + a_{t}\]
<><><><><><><><><><><><><><>
Coefficients:
Estimate p.value
sigma^2 estimated as 0.5572542 on 35 degrees of freedom
AIC = 2.310286 AICc = 2.310286 BIC = 2.354725
The Standard Residual Plot appears good, displaying stationarity with a nearly constant mean and variation.
The Autocorrelation Function (ACF) Plot shows almost no correlation indicating that the model has harnessed everything and all that is left is white noise. This indicates a good model fit.
The Quantile-Quantile (Q-Q) Plot demonstrates near-normality.
The Ljung-Box test results reveal values above the 0.05 (5% significance) threshold, indicating a good fit.
converged
<><><><><><><><><><><><><><>
Coefficients:
Estimate SE t.value p.value
ma1 -0.8685 0.1540 -5.6389 0.0000
sar1 -0.5087 0.1495 -3.4021 0.0017
sigma^2 estimated as 0.0006502313 on 34 degrees of freedom
AIC = -4.189884 AICc = -4.179783 BIC = -4.057924
The Standard Residual Plot appears good, displaying stationarity with a nearly constant mean and variation.
The Autocorrelation Function (ACF) Plot shows almost no correlation indicating that the model has harnessed everything and all that is left is white noise. This indicates a good model fit.
The Quantile-Quantile (Q-Q) Plot demonstrates near-normality.
The Ljung-Box test results reveal values above the 0.05 (5% significance) threshold, indicating a good fit.
The equation for the model is: \[x_t = -0.8685x_{t-1} - 0.5087 + a_{t}\]
converged
<><><><><><><><><><><><><><>
Coefficients:
Estimate SE t.value p.value
ma1 -0.9999 0.2161 -4.6278 0.0001
sma1 -0.9998 0.6468 -1.5458 0.1314
sigma^2 estimated as 0.001547766 on 34 degrees of freedom
AIC = -2.882414 AICc = -2.872313 BIC = -2.750454
The Standard Residual Plot appears good, displaying stationarity with a nearly constant mean and variation.
The Autocorrelation Function (ACF) Plot shows almost no correlation indicating that the model has harnessed everything and all that is left is white noise. This indicates a good model fit.
The Quantile-Quantile (Q-Q) Plot demonstrates near-normality.
The Ljung-Box test results reveal values above the 0.05 (5% significance) threshold, indicating a good fit.
$ttable: only ma1 are significant.
The equation for the model is: \[x_t = -0.9999x_{t-1} - 0.9998 + a_{t}\]
Coefficients:
Estimate SE t.value p.value
ma1 -0.4338 0.1777 -2.4408 0.0202
ma2 -0.4032 0.1828 -2.2057 0.0345
sma1 -0.9999 0.5380 -1.8587 0.0720
sigma^2 estimated as 0.0004077668 on 33 degrees of freedom
AIC = -4.243697 AICc = -4.222864 BIC = -4.06775
The Standard Residual Plot appears good, displaying stationarity with a nearly constant mean and variation.
The Autocorrelation Function (ACF) Plot shows almost no correlation indicating that the model has harnessed everything and all that is left is white noise. This indicates a good model fit.
The Quantile-Quantile (Q-Q) Plot demonstrates near-normality.
The Ljung-Box test results reveal values above the 0.05 (5% significance) threshold, indicating a good fit.
$ttable: all coefficients are significant.
The equation for the model is: \[x_t = -0.4338x_{t-1} - 0.4032x_{t-2} - 0.9999 + a_{t}\]
time <-Arima(diff(diff(d_vacc_ts)), order=c(0,1,1), seasonal=c(0,1,0))# forecast next three yearstime %>%forecast(h=36) %>%autoplot()+theme_bw() +theme(plot.background =element_rect(fill ="#D9E3F1", color =NA),panel.background =element_rect(fill ="#D9E3F1", color =NA))
Code
time <-Arima(p_vacc_ts, order=c(1,1,2), seasonal=c(0,1,0))# forecast next three yearstime %>%forecast(h=36) %>%autoplot()+theme_bw() +theme(plot.background =element_rect(fill ="#D9E3F1", color =NA),panel.background =element_rect(fill ="#D9E3F1", color =NA))
Code
time <-Arima(pf_vacc_ts, order=c(2,1,0), seasonal=c(0,1,0))# forecast next three yearstime %>%forecast(h=36) %>%autoplot()+theme_bw() +theme(plot.background =element_rect(fill ="#D9E3F1", color =NA),panel.background =element_rect(fill ="#D9E3F1", color =NA))
Code
time <-Arima(dead_ts, order=c(0,1,2), seasonal=c(0,1,1))# forecast next three yearstime %>%forecast(h=36) %>%autoplot()+theme_bw() +theme(plot.background =element_rect(fill ="#D9E3F1", color =NA),panel.background =element_rect(fill ="#D9E3F1", color =NA))
Code
time <-Arima(diff(hos_ts1), order=c(3,1,0), seasonal=c(0,1,0))# forecast next three yearstime %>%forecast(h=36) %>%autoplot()+theme_bw() +theme(plot.background =element_rect(fill ="#D9E3F1", color =NA),panel.background =element_rect(fill ="#D9E3F1", color =NA))
Code
time <-Arima(diff(hos_ts2), order=c(0,1,2), seasonal=c(0,1,1))# forecast next three yearstime %>%forecast(h=36) %>%autoplot()+theme_bw() +theme(plot.background =element_rect(fill ="#D9E3F1", color =NA),panel.background =element_rect(fill ="#D9E3F1", color =NA))
Code
time <-Arima(diff(hos_ts3), order=c(0,1,2), seasonal=c(0,1,1))# forecast next three yearstime %>%forecast(h=36) %>%autoplot()+theme_bw() +theme(plot.background =element_rect(fill ="#D9E3F1", color =NA),panel.background =element_rect(fill ="#D9E3F1", color =NA))
Code
time <-Arima(stock_ts, order=c(0,1,0), seasonal=c(0,1,0))# forecast next three yearstime %>%forecast(h=36) %>%autoplot()+theme_bw() +theme(plot.background =element_rect(fill ="#D9E3F1", color =NA),panel.background =element_rect(fill ="#D9E3F1", color =NA))
Code
time <-Arima(demo_ts, order=c(0,1,1), seasonal=c(1,1,0))# forecast next three yearstime %>%forecast(h=36) %>%autoplot()+theme_bw() +theme(plot.background =element_rect(fill ="#D9E3F1", color =NA),panel.background =element_rect(fill ="#D9E3F1", color =NA))
Code
time <-Arima(inde_ts, order=c(0,1,1), seasonal=c(0,1,1))# forecast next three yearstime %>%forecast(h=36) %>%autoplot()+theme_bw() +theme(plot.background =element_rect(fill ="#D9E3F1", color =NA),panel.background =element_rect(fill ="#D9E3F1", color =NA))
Code
time <-Arima(rep_ts, order=c(0,1,2), seasonal=c(0,1,1))# forecast next three yearstime %>%forecast(h=36) %>%autoplot()+theme_bw() +theme(plot.background =element_rect(fill ="#D9E3F1", color =NA),panel.background =element_rect(fill ="#D9E3F1", color =NA))
The plot you’ve uploaded appears to be a time series chart comparing different forecasting methods against actual data. The actual data is represented by the black line, and it shows daily vaccinations up to a certain point in time. The colored lines represent forecasts from different models, including ARIMA, Drift, Mean, and Naïve methods, projected beyond the actual data into future dates. Each forecast method predicts a different outcome for future vaccination numbers, with the ARIMA model suggesting a continued steady rate, while other models predict various levels of change or stability. Unfortunately, I can’t display the plot here, but I can provide descriptions or summaries of visual data when you upload it.
This plot appears to be comparing the forecasted number of people vaccinated using different benchmark methods against actual historical data. The black line represents the historical data of people vaccinated over time. The different colored lines at the end of the historical data represent forecasts made by different models for future vaccinations: Arima, Drift, Mean, and Naïve. The Arima model shows a sharp upward trend, suggesting a significant increase in vaccinations, while the other models forecast a relatively steady or only slightly increasing trend.
The plot appears to be a time series graph that shows data on the number of people fully vaccinated over time. The black line represents the actual historical data, showing a steady increase in the number of fully vaccinated individuals over time. The colored lines represent different forecasting methods projected into the future (2024 and beyond), such as ARIMA (Autoregressive Integrated Moving Average), Drift, Mean, and Naïve forecasting. Each method provides a different projection, indicating varying expectations about future vaccination trends based on past data.
The plot appears to show a comparison of different forecasting methods for a time series data set related to newly confirmed cases of a condition, likely COVID-19, given the context. The black line represents the actual historical data, while the various colored lines project into the future with forecasts from different methods. The ARIMA forecast (red) predicts an increase, while the Drift (green), Mean (blue), and Naïve (purple) methods predict a flat or slightly varied continuation of the most recent data. This type of visualization is used to compare the predictive performance of different statistical or machine learning models.
The plot is a comparison of forecast methods for death cases over time, with historical data shown by the black line increasing from 2020 through 2023. On the y-axis, death cases are plotted on a logarithmic scale, allowing for a wide range of values. Past 2023, four different forecast methods are shown: ARIMA (red line), suggesting a continuous increase; Drift (green line), indicating a more moderate increase; Mean (blue line), projecting a flat trend indicating no change from the last actual data point; and Naive (purple line), also predicting no change moving forward. These forecasts are meant to predict future values based on the historical trend and their respective statistical assumptions. The plot serves as a visual assessment tool to compare how each method anticipates the future based on the given data.
This graph illustrates the comparison of various forecasting methods applied to the number of inpatient beds over time. The historical data, plotted as a black line, shows a sharp increase in the number of beds around 2020, followed by a stabilization around 6e+05 (600,000 beds). Post-2023, the graph features predictions from different statistical models: ARIMA (red line) predicts a significant increase in bed numbers; Drift (green line) forecasts a slight upward trend; Mean (blue line) suggests the number will remain constant, equal to the historical average; and Naive (purple line) assumes no change, extending the last data point forward. The plot serves as a tool to visualize and evaluate how these models anticipate changes in hospital bed availability, with each model’s forecast based on its specific methodological approach to the existing data.
Code
fit_techmu_bench <-Arima(hos_ts2, order=c(2, 1, 1),include.drift =FALSE) autoplot(hos_ts2) +autolayer(meanf(hos_ts2, h=10),series="Mean", PI=FALSE) +autolayer(naive(hos_ts2, h=10),series="Naïve", PI=FALSE)+autolayer(rwf(hos_ts2, drift=TRUE, h=10),series="Drift", PI=FALSE)+autolayer(forecast(hos_ts2 ,10), series="Arima",PI=FALSE) +theme_bw() +ggtitle("Benchmark Methods Comparison with Inpatient Bed Number Used for COVID")+guides(colour=guide_legend(title="Forecast"))
The plot depicts the number of inpatient hospital beds used for COVID-19 over time, showing a volatile history with several peaks, particularly notable in 2020 and 2021. The time series data, illustrated by the black line, exhibits sharp increases and decreases, suggesting waves or surges in hospital bed usage due to the pandemic. Looking into the future beyond the historical data, various forecasting methods have been applied, shown by the horizontal lines after 2023: ARIMA (red) forecasts a slight increase, Drift (green) indicates stability with a very mild upward trend, Mean (blue) predicts a constant number equal to the historical average, and Naive (purple) extends the last observed data point forward, assuming no change. These forecasts provide a range of potential future scenarios for hospital bed usage, reflecting the differing assumptions and calculations inherent to each forecasting model.
Code
fit_techmu_bench <-Arima(hos_ts3, order=c(2, 0, 0),include.drift =FALSE) autoplot(hos_ts3) +autolayer(meanf(hos_ts3, h=10),series="Mean", PI=FALSE) +autolayer(naive(hos_ts3, h=10),series="Naïve", PI=FALSE)+autolayer(rwf(hos_ts3, drift=TRUE, h=10),series="Drift", PI=FALSE)+autolayer(forecast(hos_ts3 ,10), series="Arima",PI=FALSE) +theme_bw() +ggtitle("Benchmark Methods Comparison with Utilization Rate for Inpatient Bed Used for COVID")+guides(colour=guide_legend(title="Forecast"))
This plot compares different forecasting methods for the utilization rate of inpatient beds used for COVID-19, as indicated by the historical data (black line) from 2020 to 2023. The y-axis represents the utilization rate, which shows significant fluctuations, peaking notably at various points likely corresponding to waves of COVID-19 cases. The forecast methods, represented by colored lines beyond 2023, provide different projections: ARIMA (red line) shows a slight increasing trend, Drift (green line) indicates a marginal increase, Mean (blue line) suggests a flat trend at the historical average, and Naive (purple line) extends the last observed data point, assuming the rate will remain unchanged. These models are used to predict future bed utilization, offering a visual tool for comparing the potential accuracy and assumptions of each method against future real-world data.
This plot displays the historical trend and forecasts of the unemployment rate from 2020 to beyond 2022. The black line represents the actual historical unemployment rate, which shows a sharp spike in 2020, followed by a general decline over the subsequent years. Looking to the future, the forecasts made by different models are represented by the horizontal lines: ARIMA (red line) predicts a downward trend, continuing the decline of the unemployment rate; Drift (green line) suggests a stable rate, maintaining the last observed rate; Mean (blue line) forecasts that the unemployment rate will average out to a steady state, ignoring the downward trend; and Naive (purple line) projects no change, extending the last observed point into the future. The plot serves to compare how these forecasting methods project the future of unemployment rates based on the past data and their respective algorithmic interpretations.
The plot shows the historical performance and future forecast of Pfizer’s stock price. The black line represents the stock price from 2020 through part of 2023, with the price experiencing volatility and an overall downward trend. Projected forecasts beyond the historical data are made using four methods: ARIMA (red line) predicts a continuing decline; Drift (green line) forecasts a slight increase; Mean (blue line) suggests the stock price will level off to the average of the historical data; and Naive (purple line) assumes the stock price will remain constant at the last observed value. These different forecasts highlight the variability in predicting stock prices depending on the modeling technique used, with each forecast method taking a unique approach to extrapolate future prices from the past data.
Code
fit_techmu_bench <-Arima(demo_ts, order=c(0, 0, 1),include.drift =FALSE) autoplot(demo_ts) +autolayer(meanf(demo_ts, h=10),series="Mean", PI=FALSE) +autolayer(naive(demo_ts, h=10),series="Naïve", PI=FALSE)+autolayer(rwf(demo_ts, drift=TRUE, h=10),series="Drift", PI=FALSE)+autolayer(forecast(demo_ts ,10), series="Arima",PI=FALSE) +theme_bw() +ggtitle("Benchmark Methods Comparison with Support Rate for Democratic")+guides(colour=guide_legend(title="Forecast"))
The graph illustrates the historical support rate for the Democratic Party (presumably in the United States) from 2020 to the latter part of 2023, along with projected forecasts using various methods. The support rate, shown by the black line, fluctuates over time with notable volatility but remains within a band between approximately 0.86 and 0.92. The future forecasts, indicated by the lines extending from the end of 2023 to 2025, predict the support rate using different statistical models: ARIMA (red) forecasts a stable support rate continuing from the last observed point; Drift (green) also suggests a stable but slightly declining trend; Mean (blue) projects that the support rate will level off to the historical mean, and Naive (purple) predicts no change, carrying the last observed support rate forward. These projections provide a range of scenarios for future party support, each based on different assumptions about the patterns in the historical data.
Code
fit_techmu_bench <-Arima(inde_ts, order=c(0, 0, 1),include.drift =FALSE) autoplot(inde_ts) +autolayer(meanf(inde_ts, h=10),series="Mean", PI=FALSE) +autolayer(naive(inde_ts, h=10),series="Naïve", PI=FALSE)+autolayer(rwf(inde_ts, drift=TRUE, h=10),series="Drift", PI=FALSE)+autolayer(forecast(inde_ts ,10), series="Arima",PI=FALSE) +theme_bw() +ggtitle("Benchmark Methods Comparison with Support Rate for Independent")+guides(colour=guide_legend(title="Forecast"))
The plot displays the fluctuating support rate for Independents, presumably in a political context, from 2020 through 2023, and forecasts for this rate into 2025 using different statistical methods. The black line shows actual historical data, indicating that support for Independents has varied, with rates moving between just below 0.25 and around 0.35. Post-2023, the forecast lines suggest different future trends: the ARIMA model (red) predicts a very slight decline, the Drift method (green) suggests a constant rate with a small upward tendency, the Mean (blue) indicates a flat forecast at the historical average, and the Naive approach (purple) projects the rate will remain unchanged at the last observed point. These projections offer diverse perspectives on potential future support for Independents based on past patterns.
Code
fit_techmu_bench <-Arima(rep_ts, order=c(1, 0, 1),include.drift =FALSE) autoplot(rep_ts) +autolayer(meanf(rep_ts, h=10),series="Mean", PI=FALSE) +autolayer(naive(rep_ts, h=10),series="Naïve", PI=FALSE)+autolayer(rwf(rep_ts, drift=TRUE, h=10),series="Drift", PI=FALSE)+autolayer(forecast(rep_ts ,10), series="Arima",PI=FALSE) +theme_bw() +ggtitle("Benchmark Methods Comparison with Support Rate for Republican")+guides(colour=guide_legend(title="Forecast"))
This plot presents the support rate for the Republican Party from 2020 through 2023 and includes projections to 2025 based on various forecasting models. The support rate, depicted by the black line, exhibits volatility with values oscillating primarily between 0.05 and 0.1. The forecast section post-2023 features predictions from different models: ARIMA (red line) suggests a decrease in support; Drift (green line) forecasts a small upward trend; Mean (blue line) indicates that support will stabilize at the historical average rate; and Naive (purple line) predicts the support rate will remain constant at the end of the observed data. These differing forecasts provide insights into possible future trends of Republican support, each based on distinct statistical assumptions and calculations.
15. Cross Validation
We did cross validation to select the best performed model to those time series with over 40 samples. Since the Auto Arima function did not provide us with the best SARIMA model, we use the model with lowest AIC and second lowest AIC to compare their performance.
In the death case time series, we select model ARIMA(0,1,2)x(0,1,1)12(lowest AIC) and model ARIMA(0,1,2)x(1,1,0)12(second lowest AIC).
Code
#n=length(dead_ts)#n-k=24; 24/12=2; k=19mae1 <-matrix(NA, 2,12) mae2 <-matrix(NA, 2,12)st <-tsp(dead_ts)[1]+(k-1)/12#24 observations# put up to 2for(i in1:2){#xtrain <- window(a10, start=st+(i-k+1)/12, end=st+i/12) xtrain <-window(dead_ts, end=st + i-1) xtest <-window(dead_ts, start=st + (i-1) +1/12, end=st + i)# 1st model fit <-Arima(xtrain, order=c(0,1,2), seasonal=list(order=c(0,1,1), period=12),include.drift=TRUE, method="ML") fcast <-forecast(fit, h=1)# 2nd Arima fit2 <-Arima(xtrain, order=c(0,1,2), seasonal=list(order=c(1,1,0), period=12),include.drift=TRUE, method="ML") fcast2 <-forecast(fit2, h=1) mae1[i,] <-abs(fcast$mean-xtest) mae2[i,] <-abs(fcast2$mean-xtest)}max_mae <-max(c(colMeans(mae1, na.rm =TRUE), colMeans(mae2, na.rm =TRUE)), na.rm =TRUE)ylim_range <-c(0, max_mae + max_mae *0.3)plot(1:12, colMeans(mae1, na.rm =TRUE), type ="l", col =2, xlab ="Horizon", ylab ="MAE", ylim = ylim_range)lines(1:12, colMeans(mae2, na.rm =TRUE), type ="l", col =3)legend("topleft", legend =c("Model with lowest AIC", "Model with second lowest AIC"), col =2:3, lty =1)
We can see the model with the lowest AIC actually performs better!
In the inpatient bed time series, we select model ARIMA(3,1,0)x(0,1,0)12(lowest AIC) and model ARIMA(1,1,1)x(0,1,0)12(second lowest AIC).
Code
#n=length(hos_ts1) 49#n-k=25; 24/12=2; k=25mae1 <-matrix(NA, 2,12) mae2 <-matrix(NA, 2,12)st <-tsp(hos_ts1)[1]+(k-1)/12#24 observations# put up to 2for(i in1:2){#xtrain <- window(a10, start=st+(i-k+1)/12, end=st+i/12) xtrain <-window(hos_ts1, end=st + i-1) xtest <-window(hos_ts1, start=st + (i-1) +1/12, end=st + i)# 1st model fit <-Arima(xtrain, order=c(3,1,0), seasonal=list(order=c(0,1,0), period=12),include.drift=TRUE, method="ML") fcast <-forecast(fit, h=1)# 2nd Arima fit2 <-Arima(xtrain, order=c(1,1,1), seasonal=list(order=c(0,1,0), period=12),include.drift=TRUE, method="ML") fcast2 <-forecast(fit2, h=1) mae1[i,] <-abs(fcast$mean-xtest) mae2[i,] <-abs(fcast2$mean-xtest)}max_mae <-max(c(colMeans(mae1, na.rm =TRUE), colMeans(mae2, na.rm =TRUE)), na.rm =TRUE)ylim_range <-c(0, max_mae + max_mae *0.3)plot(1:12, colMeans(mae1, na.rm =TRUE), type ="l", col =2, xlab ="Horizon", ylab ="MAE", ylim = ylim_range)lines(1:12, colMeans(mae2, na.rm =TRUE), type ="l", col =3)legend("topleft", legend =c("Model with lowest AIC", "Model with second lowest AIC"), col =2:3, lty =1)
We can see the model with the second lowest AIC actually performs better!
In the inpatient bed used for COVIS time series, we select model ARIMA(0,1,2)x(0,1,1)12(lowest AIC) and model ARIMA(1,1,1)x(0,1,1)12(second lowest AIC).
Code
#n=length(hos_ts2) 49#n-k=25; 24/12=2; k=25mae1 <-matrix(NA, 2,12) mae2 <-matrix(NA, 2,12)st <-tsp(hos_ts2)[1]+(k-1)/12#24 observations# put up to 2for(i in1:2){#xtrain <- window(a10, start=st+(i-k+1)/12, end=st+i/12) xtrain <-window(hos_ts2, end=st + i-1) xtest <-window(hos_ts2, start=st + (i-1) +1/12, end=st + i)# 1st model fit <-Arima(xtrain, order=c(0,1,2), seasonal=list(order=c(0,1,1), period=12),include.drift=TRUE, method="ML") fcast <-forecast(fit, h=1)# 2nd Arima fit2 <-Arima(xtrain, order=c(1,1,1), seasonal=list(order=c(0,1,1), period=12),include.drift=TRUE, method="ML") fcast2 <-forecast(fit2, h=1) mae1[i,] <-abs(fcast$mean-xtest) mae2[i,] <-abs(fcast2$mean-xtest)}max_mae <-max(c(colMeans(mae1, na.rm =TRUE), colMeans(mae2, na.rm =TRUE)), na.rm =TRUE)ylim_range <-c(0, max_mae + max_mae *0.3)plot(1:12, colMeans(mae1, na.rm =TRUE), type ="l", col =2, xlab ="Horizon", ylab ="MAE", ylim = ylim_range)lines(1:12, colMeans(mae2, na.rm =TRUE), type ="l", col =3)legend("topleft", legend =c("Model with lowest AIC", "Model with second lowest AIC"), col =2:3, lty =1)
We can see the model with the lowest AIC actually performs better!
In the utilization rate for inpatient bed used for COVID time series, we select model ARIMA(0,1,2)x(0,1,1)12(lowest AIC) and model ARIMA(0,1,2)x(1,1,0)12(second lowest AIC).
Code
#n=length(hos_ts3) 49#n-k=25; 24/12=2; k=25mae1 <-matrix(NA, 2,12) mae2 <-matrix(NA, 2,12)st <-tsp(hos_ts3)[1]+(k-1)/12#24 observations# put up to 2for(i in1:2){#xtrain <- window(a10, start=st+(i-k+1)/12, end=st+i/12) xtrain <-window(hos_ts3, end=st + i-1) xtest <-window(hos_ts3, start=st + (i-1) +1/12, end=st + i)# 1st model fit <-Arima(xtrain, order=c(0,1,2), seasonal=list(order=c(0,1,1), period=12),include.drift=TRUE, method="ML") fcast <-forecast(fit, h=1)# 2nd Arima fit2 <-Arima(xtrain, order=c(0,1,2), seasonal=list(order=c(1,1,0), period=12),include.drift=TRUE, method="ML") fcast2 <-forecast(fit2, h=1) mae1[i,] <-abs(fcast$mean-xtest) mae2[i,] <-abs(fcast2$mean-xtest)}max_mae <-max(c(colMeans(mae1, na.rm =TRUE), colMeans(mae2, na.rm =TRUE)), na.rm =TRUE)ylim_range <-c(0, max_mae + max_mae *0.3)plot(1:12, colMeans(mae1, na.rm =TRUE), type ="l", col =2, xlab ="Horizon", ylab ="MAE", ylim = ylim_range)lines(1:12, colMeans(mae2, na.rm =TRUE), type ="l", col =3)legend("topleft", legend =c("Model with lowest AIC", "Model with second lowest AIC"), col =2:3, lty =1)
We can see the model with the second lowest AIC actually performs better!
In the support rate for democratic time series, we select model ARIMA(0,1,1)x(1,1,0)12(lowest AIC) and model ARIMA(1,1,1)x(1,1,0)12(second lowest AIC).
Code
#n=length(demo_ts) 49#n-k=25; 24/12=2; k=25mae1 <-matrix(NA, 2,12) mae2 <-matrix(NA, 2,12)st <-tsp(demo_ts)[1]+(k-1)/12#24 observations# put up to 2for(i in1:2){#xtrain <- window(a10, start=st+(i-k+1)/12, end=st+i/12) xtrain <-window(demo_ts, end=st + i-1) xtest <-window(demo_ts, start=st + (i-1) +1/12, end=st + i)# 1st model fit <-Arima(xtrain, order=c(0,1,1), seasonal=list(order=c(1,1,0), period=12),include.drift=TRUE, method="ML") fcast <-forecast(fit, h=1)# 2nd Arima fit2 <-Arima(xtrain, order=c(1,1,1), seasonal=list(order=c(1,1,0), period=12),include.drift=TRUE, method="ML") fcast2 <-forecast(fit2, h=1) mae1[i,] <-abs(fcast$mean-xtest) mae2[i,] <-abs(fcast2$mean-xtest)}max_mae <-max(c(colMeans(mae1, na.rm =TRUE), colMeans(mae2, na.rm =TRUE)), na.rm =TRUE)ylim_range <-c(0, max_mae + max_mae *0.3)plot(1:12, colMeans(mae1, na.rm =TRUE), type ="l", col =2, xlab ="Horizon", ylab ="MAE", ylim = ylim_range)lines(1:12, colMeans(mae2, na.rm =TRUE), type ="l", col =3)legend("topleft", legend =c("Model with lowest AIC", "Model with second lowest AIC"), col =2:3, lty =1)
We can see the model with the second lowest AIC actually performs better!
In the support rate for independent time series, we select model ARIMA(0,1,1)x(0,1,1)12(lowest AIC) and model ARIMA(1,1,1)x(0,1,1)12(second lowest AIC).
Code
#n=length(inde_ts) 49#n-k=25; 24/12=2; k=25mae1 <-matrix(NA, 2,12) mae2 <-matrix(NA, 2,12)st <-tsp(inde_ts)[1]+(k-1)/12#24 observations# put up to 2for(i in1:2){#xtrain <- window(a10, start=st+(i-k+1)/12, end=st+i/12) xtrain <-window(inde_ts, end=st + i-1) xtest <-window(inde_ts, start=st + (i-1) +1/12, end=st + i)# 1st model fit <-Arima(xtrain, order=c(0,1,1), seasonal=list(order=c(0,1,1), period=12),include.drift=TRUE, method="ML") fcast <-forecast(fit, h=1)# 2nd Arima fit2 <-Arima(xtrain, order=c(1,1,1), seasonal=list(order=c(0,1,1), period=12),include.drift=TRUE, method="ML") fcast2 <-forecast(fit2, h=1) mae1[i,] <-abs(fcast$mean-xtest) mae2[i,] <-abs(fcast2$mean-xtest)}max_mae <-max(c(colMeans(mae1, na.rm =TRUE), colMeans(mae2, na.rm =TRUE)), na.rm =TRUE)ylim_range <-c(0, max_mae + max_mae *0.3)plot(1:12, colMeans(mae1, na.rm =TRUE), type ="l", col =2, xlab ="Horizon", ylab ="MAE", ylim = ylim_range)lines(1:12, colMeans(mae2, na.rm =TRUE), type ="l", col =3)legend("topleft", legend =c("Model with lowest AIC", "Model with second lowest AIC"), col =2:3, lty =1)
We can see the model with the second lowest AIC actually performs better!
In the support rate for republican time series, we select model ARIMA(0,1,2)x(0,1,1)12(lowest AIC) and model ARIMA(1,1,1)x(0,1,1)12(second lowest AIC).
Code
#n=length(rep_ts) 49#n-k=25; 24/12=2; k=25mae1 <-matrix(NA, 2,12) mae2 <-matrix(NA, 2,12)st <-tsp(rep_ts)[1]+(k-1)/12#24 observations# put up to 2for(i in1:2){#xtrain <- window(a10, start=st+(i-k+1)/12, end=st+i/12) xtrain <-window(rep_ts, end=st + i-1) xtest <-window(rep_ts, start=st + (i-1) +1/12, end=st + i)# 1st model fit <-Arima(xtrain, order=c(0,1,2), seasonal=list(order=c(0,1,1), period=12),include.drift=TRUE, method="ML") fcast <-forecast(fit, h=1)# 2nd Arima fit2 <-Arima(xtrain, order=c(1,1,1), seasonal=list(order=c(0,1,1), period=12),include.drift=TRUE, method="ML") fcast2 <-forecast(fit2, h=1) mae1[i,] <-abs(fcast$mean-xtest) mae2[i,] <-abs(fcast2$mean-xtest)}max_mae <-max(c(colMeans(mae1, na.rm =TRUE), colMeans(mae2, na.rm =TRUE)), na.rm =TRUE)ylim_range <-c(0, max_mae + max_mae *0.3)plot(1:12, colMeans(mae1, na.rm =TRUE), type ="l", col =2, xlab ="Horizon", ylab ="MAE", ylim = ylim_range)lines(1:12, colMeans(mae2, na.rm =TRUE), type ="l", col =3)legend("topleft", legend =c("Model with lowest AIC", "Model with second lowest AIC"), col =2:3, lty =1)
We can see the model with the second lowest AIC actually performs better!